GorocksDB和BoltDB读写性能测试

由于项目需求,要使用rocksdb来进行元数据metadata的存储来快速读写数据,那么使用在rocksdb上封装了go的gorocksdb还是使用完全用go实现的boltdb,需要进行测试。


一、搭建环境

安装原生的rocksdb(https://github.com/facebook/rocksdb http://blog.jeffli.me/blog/2016/12/02/getting-started-with-rocksdb-in-centos-7/

gorocksdb安装:https://github.com/tecbot/gorocksdb

>在安装的过程中遇到stdatomic的问题,升级g++:http://blog.csdn.net/lavorange/article/details/70854459

>go build的过程中报错:


# github.com/tecbot/gorocksdb
../github.com/tecbot/gorocksdb/backup.go:4:24: fatal error: rocksdb/c.h: No such file or directory
// #include "rocksdb/c.h"
compilation terminated.

>go build -tags=embed

boltdb安装https://github.com/boltdb/bolt


二、编写benchmark

gorocksdb.go:

package main

import (
	"crypto/md5"
	"encoding/hex"
	"fmt"
	"log"
	"math/rand"
	"os"
	"strconv"
	"time"

	"github.com/tecbot/gorocksdb"
)

func main() {
	concurrency, _ := strconv.Atoi(os.Args[1])
	tasks, _ := strconv.Atoi(os.Args[2])
	disks, _ := strconv.Atoi(os.Args[3])
	partitions, _ := strconv.Atoi(os.Args[4])

	dbPaths := listDBPaths('b', disks, partitions)
	dbs := openDBs(dbPaths)
	ropt := gorocksdb.NewDefaultReadOptions()
	wopt := gorocksdb.NewDefaultWriteOptions()

	wch := make(chan time.Duration, concurrency)
	wsuccess := make(chan int, concurrency)

	rch := make(chan time.Duration, concurrency)
	rsuccess := make(chan int, concurrency)
	srcs := prepareData(1000)

	keys := populateData(1000, srcs, dbs)

	for t := 0; t < 10; t++ {
		log.Printf("start to benchmark...")
		begin := time.Now()
		for i := 0; i < concurrency; i++ {
			go func(id int) {
				var rduration time.Duration
				var wduration time.Duration
				rsucc := 0
				wsucc := 0

				for j := 0; j < tasks; j++ {
					di := rand.Intn(len(dbs))
					db := dbs[di]

					if rand.Intn(100) <= 20 {
						key := generateKey()
						data := srcs[rand.Intn(len(srcs))].Data

						start := time.Now()
						if err := db.Put(wopt, []byte(key), data); err == nil {
							wduration += time.Since(start)
							wsucc++
						}
					} else {
						key := keys[di][rand.Intn(len(keys[di]))]

						start := time.Now()
						if _, err := db.Get(ropt, []byte(key)); err == nil {
							rduration += time.Since(start)
							rsucc++
						}
					}
				}

				rch <- rduration
				rsuccess <- rsucc
				wch <- wduration
				wsuccess <- wsucc
			}(i)
		}

		var relapsed, welapsed time.Duration
		var rcount, wcount int64
		for i := 0; i < concurrency; i++ {
			relapsed += <-rch
			welapsed += <-wch
			rcount += int64(<-rsuccess)
			wcount += int64(<-wsuccess)
		}

		d := time.Since(begin)

		log.Printf("For read requests:")
		log.Printf("it took %s", relapsed)
		log.Printf("success requests: %d", rcount)
		log.Printf("time cost per request: %.2fms", float64((relapsed/time.Millisecond))/float64(rcount))
		log.Printf("qps: %.2f\n\n", float64(rcount*1000)/float64(d/time.Millisecond))
		log.Printf("For write requests:")
		log.Printf("it took %s", welapsed)
		log.Printf("success requests: %d", wcount)
		log.Printf("time cost per request: %.2fms", float64((welapsed/time.Millisecond))/float64(wcount))
		log.Printf("qps: %.2f\n\n\n", float64(wcount*1000)/float64(d/time.Millisecond))

		time.Sleep(time.Second * 600)
	}

	for _, d := range dbs {
		d.Close()
	}
}

func generateKey() string {
	t := fmt.Sprintf("tenant%06d", rand.Intn(100))
	c := fmt.Sprintf("container%04d", rand.Intn(10))

	ts := time.Now()
	o := strconv.FormatInt(ts.UnixNano(), 10)

	return fmt.Sprintf("/%s/%s/%s", t, c, o)
}

type src struct {
	Data     []byte
	Checksum string
}

func prepareData(n int) []src {
	var srcs []src
	for i := 0; i < n; i++ {
		data := Bytes(256)
		checksum := md5.Sum(data)
		srcs = append(srcs, src{Data: data, Checksum: hex.EncodeToString(checksum[:])})
	}

	return srcs
}

func openDBs(paths []string) []*gorocksdb.DB {
	var dbs []*gorocksdb.DB

	opts := gorocksdb.NewDefaultOptions()
	opts.SetCreateIfMissing(true)
	opts.SetCompression(gorocksdb.NoCompression)
	opts.SetWriteBufferSize(671088640)

	for _, p := range paths {
		db, err := gorocksdb.OpenDb(opts, p)
		if err != nil {
			log.Fatal(err)
		}
		dbs = append(dbs, db)
	}

	return dbs
}

func listDBPaths(begin rune, disks int, partitions int) []string {
	var dbs []string

	for i := 0; i < disks; i++ {
		for j := 0; j < partitions; j++ {
			db := fmt.Sprintf("/srv/node/sd%c/%d", begin, j)
			dbs = append(dbs, db)
		}
		begin += 1
	}

	return dbs
}

func populateData(num int, srcs []src, dbs []*gorocksdb.DB) [][]string {
	keys := make([][]string, len(dbs))
	wopt := gorocksdb.NewDefaultWriteOptions()
	wopt.SetSync(true)

	for i, db := range dbs {
		var ks []string
		for j := 0; j < num; j++ {
			data := srcs[rand.Intn(len(srcs))].Data
			p := generateKey()

			if err := db.Put(wopt, []byte(p), data); err != nil {
				continue
			}
			ks = append(ks, p)
		}

		keys[i] = ks
	}

	return keys
}

func checksum(data []byte) string {
	checksum := md5.Sum(data)
	return hex.EncodeToString(checksum[:])
}

func Bytes(n int) []byte {
	d := make([]byte, n)
	rand.Read(d)

	return d
}
boltdb.go:

package main
import (
    "crypto/md5"
    "encoding/hex"
    "fmt"
    "log"
    "math/rand"
    "os"
    "strconv"
    "time"
    "github.com/boltdb/bolt"
)
func main() {
    concurrency, _ := strconv.Atoi(os.Args[1])
    tasks, _ := strconv.Atoi(os.Args[2])
    disks, _ := strconv.Atoi(os.Args[3])
    partitions, _ := strconv.Atoi(os.Args[4])
    dbPaths := listDBPaths('b', disks, partitions)
    dbs := openDBs(dbPaths)
    wch := make(chan time.Duration, concurrency)
    wsuccess := make(chan int, concurrency)
    rch := make(chan time.Duration, concurrency)
    rsuccess := make(chan int, concurrency)
    srcs := prepareData(1000)
    keys := populateData(1000, srcs, dbs)
    di := rand.Intn(len(dbs))
    db := dbs[di]
    //var bucket *bolt.Bucket
    //var err error
    db.Update(func(tx *bolt.Tx) error {
        //bucket, err = tx.CreateBucketIfNotExists([]byte("iqiyi"))
        _, err := tx.CreateBucketIfNotExists([]byte("iqiyi"))
        if err != nil {
            return err
        }
        return nil
    })
    for t := 0; t < 3; t++ {
        log.Printf("start to benchmark...")
        begin := time.Now()
        for i := 0; i < concurrency; i++ {
            go func(id int) {
                var rduration time.Duration
                var wduration time.Duration
                rsucc := 0
                wsucc := 0
                for j := 0; j < tasks; j++ {
                    if rand.Intn(100) <= 20 {
                        key := generateKey()
                        data := srcs[rand.Intn(len(srcs))].Data
                        start := time.Now()
                        if err := db.Update(func(tx *bolt.Tx) error {
                            b := tx.Bucket([]byte("iqiyi"))
                            err := b.Put([]byte(key), []byte(data))
                            return err
                        }); err == nil {
                            wduration += time.Since(start)
                            wsucc++
                        }
                    } else {
                        key := keys[di][rand.Intn(len(keys[di]))]
                        start := time.Now()
                        if err := db.View(func(tx *bolt.Tx) error {
                            b := tx.Bucket([]byte("iqiyi"))
                            _ = b.Get([]byte(key))
                            return nil
                        }); err == nil {
                            rduration += time.Since(start)
                            rsucc++
                        }
                    }
                }
                rch <- rduration
                rsuccess <- rsucc
                wch <- wduration
                wsuccess <- wsucc
            }(i)
        }
        var relapsed, welapsed time.Duration
        var rcount, wcount int64
        for i := 0; i < concurrency; i++ {
            relapsed += <-rch
            welapsed += <-wch
            rcount += int64(<-rsuccess)
            wcount += int64(<-wsuccess)
        }
        d := time.Since(begin)
        log.Printf("For read requests:")
        log.Printf("it took %s", relapsed)
        log.Printf("success requests: %d", rcount)
        log.Printf("time cost per request: %.6fms", float64((relapsed/time.Millisecond))/float64(rcount))
        log.Printf("qps: %.2f\n\n", float64(rcount*1000)/float64(d/time.Millisecond))
        log.Printf("For write requests:")
        log.Printf("it took %s", welapsed)
        log.Printf("success requests: %d", wcount)
        log.Printf("time cost per request: %.6fms", float64((welapsed/time.Millisecond))/float64(wcount))
        log.Printf("qps: %.2f\n\n\n", float64(wcount*1000)/float64(d/time.Millisecond))
        time.Sleep(time.Second * 10)
    }
    for _, d := range dbs {
        d.Close()
    }
}
func generateKey() string {
    t := fmt.Sprintf("tenant%06d", rand.Intn(100))
    c := fmt.Sprintf("container%04d", rand.Intn(10))
    ts := time.Now()
    o := strconv.FormatInt(ts.UnixNano(), 10)
    return fmt.Sprintf("/%s/%s/%s", t, c, o)
}
type src struct {
    Data     []byte
    Checksum string
}
func prepareData(n int) []src {
    var srcs []src
    for i := 0; i < n; i++ {
        data := Bytes(256)
        checksum := md5.Sum(data)
        srcs = append(srcs, src{Data: data, Checksum: hex.EncodeToString(checksum[:])})
    }
    return srcs
}
func openDBs(paths []string) []*bolt.DB {
    var dbs []*bolt.DB
    for _, p := range paths {
        db, err := bolt.Open(p, 0600, &bolt.Options{Timeout: 1 * time.Second})
        if err != nil {
            log.Fatal(err)
        }
        dbs = append(dbs, db)
    }
    return dbs
}
func listDBPaths(begin rune, disks int, partitions int) []string {
    var dbs []string
    for i := 0; i < disks; i++ {
        for j := 0; j < partitions; j++ {
            db := fmt.Sprintf("/srv/node/sd%c/%d", begin, j)
            dbs = append(dbs, db)
        }
        begin += 1
    }
    return dbs
}
func populateData(num int, srcs []src, dbs []*bolt.DB) [][]string {
    keys := make([][]string, len(dbs))
    for i, db := range dbs {
        var ks []string
        for j := 0; j < num; j++ {
            data := srcs[rand.Intn(len(srcs))].Data
            p := generateKey()
            if err := db.Update(func(tx *bolt.Tx) error {
                b, err := tx.CreateBucketIfNotExists([]byte("iqiyi"))
                if err != nil {
                    return err
                }
                return b.Put([]byte(p), []byte(data))
            }); err != nil {
                continue
            }
            ks = append(ks, p)
        }
        keys[i] = ks
    }
    return keys
}
func checksum(data []byte) string {
    checksum := md5.Sum(data)
    return hex.EncodeToString(checksum[:])
}
func Bytes(n int) []byte {
    d := make([]byte, n)
    rand.Read(d)
    return d
}

三、测试数据

写入数据大小:256Bytes

gorocksdb       read write
读写比 concurrency task disks partitions time cost per request(ms) qps time cost per request(ms) qps
1/1 1 10000 1 1 0.002192 6628 0.147531 6581
          0.002661 4886 0.147047 5114
          0.002641 4922 0.147302 5078
  1 100000 1 1 0.002513 6403 0.147375 6573
          0.00274 6285 0.147431 6568
          0.002873 6328 0.147225 6568
  10 10000 1 1 0.004166 6306 1.42332 6573
          0.004538 6294 1.421758 6572
          0.005053 6285 1.424145 6572
  10 100000 1 1 0.004472 6319 1.430665 6572
          0.004839 6309 1.429804 6572
          0.004883 6322 1.434386 6572
读写比 concurrency task disks partitions time cost per request(ms) qps time cost per request(ms) qps
2/1 1 10000 1 1 0.001956 13056 0.1452 6589
          0.00213 168564 0.00467 87846
          0.001984 177135 0.004643 93135
  1 100000 1 1 0.002195 12791 0.145497 6569
          0.002455 12739 0.144994 6569
          0.0026 12614 0.14476 6568
  10 10000 1 1 0.003442 12728 1.41615 6573
          0.004326 12689 1.424346 6570
          0.003779 12549 1.414829 6570
  10 100000 1 1 0.003906 13140 1.390111 6776
          0.003559 258863 0.036561 133292
          0.003715 259169 0.037855 133295
读写比 concurrency task disks partitions time cost per request(ms) qps time cost per request(ms) qps
4/1 1 10000 1 1 0.001879 266166 0.00397 67166
          0.002014 233617 0.004861 60500
          0.002021 247437 0.004803 65062
  1 100000 1 1 0.002095 236567 0.005108 61940
          0.002156 229250 0.005015 61447
          0.002181 231973 0.005159 62144
  10 10000 1 1 0.002965 458075 0.0146 119959
          0.003317 431683 0.014522 114765
          0.00319 443735 0.014989 118061
  10 100000 1 1 0.003069 446782 0.013929 118508
          0.003161 449578 0.014084 119573
          0.003254 447419 0.013919 118832
                 
boltdb         read write
读写比 concurrency task disks partitions time cost per request(ms) qps time cost per request(ms) qps
1/1 1 10000 1 1 0.006532 1065 0.888649 1109
          0.00671 1095 0.87072 1131
          0.006783 1080 0.86407 1140
  1 100000 1 1 0.006952 1150 0.828435 1187
          0.006498 1324 0.715377 1373
          0.006452 1663 0.561383 1743
  10 10000 1 1 0.006708 2064 3.720686 2131
          0.006355 2687 3.093097 2786
          0.006616 3622 2.361014 3797
  10 100000 1 1 0.005509 4398 2.03813 4579
          0.005502 4394 2.05086 4576
          0.005514 4375 2.097717 4567
2/1 1 10000 1 1 0.004645 9269 0.203247 4619
          0.004695 8839 0.206359 4547
          0.004593 8744 0.20271 4642
  1 100000 1 1 0.004583 9003 0.203576 4615
          0.004593 8970 0.203027 4627
          0.004658 8891 0.204177 4599
  10 10000 1 1 0.004825 8676 1.869094 4447
          0.005624 8217 2.051071 4239
          0.004901 8564 1.906171 4430
  10 100000 1 1 0.005038 8549 2.109865 4400
          0.00495 8600 2.1919 4443
          0.004968 8640 2.092398 4457
4/1 1 10000 1 1 0.004395 16799 0.210604 4297
          0.004039 17073 0.20308 4478
          0.004058 16461 0.206619 4415
  1 100000 1 1 0.004016 17254 0.200692 4536
          0.004095 16979 0.20321 4479
          0.004084 16876 0.201144 4527
  10 10000 1 1 0.00466 16509 2.154042 4340
          0.00484 16237 2.117413 4283
          0.004718 16022 2.17991 4298
  10 100000 1 1 0.004644 16482 2.188008 4370
          0.004745 16425 2.217973 4364
          0.004661 16418 2.196645 4371


四、结论

1.从读性能上来说,boltdb与gorocksdb在同一个数量级上,速度都很快,在微妙级别。

2.从写性能上来看,boltdb的写速度比gorocksdb稍慢,但是在同一个数量级。

3.在并发量扩大十倍(10个并发相比于单并发)的情况下,读性能没有影响,boltdb写入的时间也延时了一个数量级。


Author:忆之独秀

Email:[email protected]

注明出处:http://blog.csdn.net/lavorange/article/details/74566724


猜你喜欢

转载自blog.csdn.net/lavorange/article/details/74566724