测试类型转换和类型判断的效率
package main import "testing" type InterfaceA interface { AA() } type InterfaceB interface { BB() } type A struct { v int } type B struct { v int } func (a *A) AA() { a.v++ } func (b *B) BB() { b.v++ } func TypeSwitch(v interface{}) { switch v.(type) { case InterfaceA: v.(InterfaceA).AA() case InterfaceB: v.(InterfaceB).BB() } } func NormalSwitch(a *A) { a.AA() } func InterfaceSwitch(v interface{}) { v.(InterfaceA).AA() } func Benchmark_TypeSwitch(b *testing.B) { var a = new(A) for i := 0; i < b.N; i++ { TypeSwitch(a) } } func Benchmark_NormalSwitch(b *testing.B) { var a = new(A) for i := 0; i < b.N; i++ { NormalSwitch(a) } } func Benchmark_InterfaceSwitch(b *testing.B) { var a = new(A) for i := 0; i < b.N; i++ { InterfaceSwitch(a) }
}
> go test -test.bench=".*" labs01_test.go goos: windows goarch: amd64 Benchmark_TypeSwitch-8 70671793 17.2 ns/op Benchmark_NormalSwitch-8 818507546 1.50 ns/op Benchmark_InterfaceSwitch-8 137425827 8.81 ns/op PASS ok command-line-arguments 4.880s
类型转换和判断都几倍于普通操作,但是实际上看一些开源项目,v.(InterfaceA).AA()这种操作还是蛮多的,出于什么考虑呢,我觉得是为了确保传入值能fast path,在学习channel源码也发现go的实现思想就是fast path。
指针还是值参快
type BigStruct struct { C01 uint64 C02 uint64 C03 uint64 C04 uint64 C05 uint64 C06 uint64 C07 uint64 C08 uint64 C09 uint64 C10 uint64 C11 uint64 C12 uint64 C13 uint64 C14 uint64 C15 uint64 C16 uint64 C17 uint64 C18 uint64 C19 uint64 C20 uint64 C21 uint64 C22 uint64 C23 uint64 C24 uint64 C25 uint64 C26 uint64 C27 uint64 C28 uint64 C29 uint64 C30 uint64 } func Invoke1(a *BigStruct) uint64 { return a.C30 } func Invoke2(a BigStruct) uint64 { return a.C30 } func Benchmark_Invoke1(b *testing.B) { var a = new(BigStruct) for i := 0; i < b.N; i++ { Invoke1(a) } } func Benchmark_Invoke2(b *testing.B) { var a = BigStruct{} for i := 0; i < b.N; i++ { Invoke2(a) } }
>go test -test.bench=".*" labs02_test.go goos: windows goarch: amd64 Benchmark_Invoke1-8 1000000000 0.276 ns/op Benchmark_Invoke2-8 1000000000 0.275 ns/op
这样看好像没区别,但是改一下
type BigStruct struct { C30 uint64 } func Invoke1(a *BigStruct) { a.C30++ } func Invoke2(a BigStruct) { a.C30++ } func Benchmark_Invoke1(b *testing.B) { var a = new(BigStruct) for i := 0; i < b.N; i++ { Invoke1(a) } } func Benchmark_Invoke2(b *testing.B) { var a = BigStruct{} for i := 0; i < b.N; i++ { Invoke2(a) } }
goos: windows goarch: amd64 Benchmark_Invoke1-8 813119961 1.44 ns/op Benchmark_Invoke2-8 1000000000 0.274 ns/op PASS ok command-line-arguments 1.789s
其实这个问题我直接讲答案,能用结构体就不要用指针,因为很容易发生内存逃逸,如果要详细分析还要分析go的内存模型,实际上go的逻辑是能在栈分配就不去堆,对于过大的对象会去堆分配的,堆的效率比起栈差很多,所以这也是为什么第一种情况存在旗鼓相当的原因。内存逃逸很简单理解,po一个讲的不错的链接。https://driverzhang.github.io/post/golang%E5%86%85%E5%AD%98%E5%88%86%E9%85%8D%E9%80%83%E9%80%B8%E5%88%86%E6%9E%90/
range和for
type BigStruct struct { C01 int C02 int C03 int C04 int C05 int C06 int C07 int C08 int C09 int C10 int C11 int C12 int C13 int C14 int C15 int C16 int C17 int C18 int C19 int C20 int C21 int C22 int C23 int C24 int C25 int C26 int C27 int C28 int C29 int C30 int } func Loop1(a []*BigStruct) int { var n = 0 for i := 0; i < len(a); i++ { n += a[i].C30 } return n } func Loop2(a []*BigStruct) int { var n = 0 for _, item := range a { n += item.C30 } return n } func Loop3(a []BigStruct) int { var n = 0 for i := 0; i < len(a); i++ { n += a[i].C30 } return n } func Loop4(a []BigStruct) int { var n = 0 for _, item := range a { n += item.C30 } return n } func Benchmark_Loop1(b *testing.B) { b.StopTimer() var a = make([]*BigStruct, 1000) for i := 0; i < len(a); i++ { a[i] = new(BigStruct) } b.StartTimer() for i := 0; i < b.N; i++ { Loop1(a) } } func Benchmark_Loop2(b *testing.B) { b.StopTimer() var a = make([]*BigStruct, 1000) for i := 0; i < len(a); i++ { a[i] = new(BigStruct) } b.StartTimer() for i := 0; i < b.N; i++ { Loop2(a) } } func Benchmark_Loop3(b *testing.B) { b.StopTimer() var a = make([]BigStruct, 1000) b.StartTimer() for i := 0; i < b.N; i++ { Loop3(a) } } func Benchmark_Loop4(b *testing.B) { b.StopTimer() var a = make([]BigStruct, 1000) b.StartTimer() for i := 0; i < b.N; i++ { Loop4(a) } }
>go test -test.bench=".*" labs03_test.go goos: windows goarch: amd64 Benchmark_Loop1-8 1516324 795 ns/op Benchmark_Loop2-8 1516320 801 ns/op Benchmark_Loop3-8 2077186 585 ns/op Benchmark_Loop4-8 107389 11334 ns/op
为什么第四种这么慢,因为range每次都是对item的拷贝,这就取决于item的复杂度了,所以我们需要这么改造一下。
func Loop4(a []BigStruct) int { var n = 0 for i, _ := range a { n += a[i].C30 } return n }
Benchmark_Loop1-8 1483605 843 ns/op Benchmark_Loop2-8 1452781 838 ns/op Benchmark_Loop3-8 2052632 586 ns/op Benchmark_Loop4-8 1934811 583 ns/op
效果很明显
整数和浮点数运算
func Benchmark_IntAdd(b *testing.B) { var a = 0 for i := 0; i < b.N; i++ { a += 1 } } func Benchmark_Int8Add(b *testing.B) { var a int8 = 0 for i := 0; i < b.N; i++ { a += 1 } } func Benchmark_Int16Add(b *testing.B) { var a int8 = 0 for i := 0; i < b.N; i++ { a += 1 } } func Benchmark_Int32Add(b *testing.B) { var a int32 = 0 for i := 0; i < b.N; i++ { a += 1 } } func Benchmark_Int64Add(b *testing.B) { var a int64 = 0 for i := 0; i < b.N; i++ { a += 1 } } func Benchmark_Float32Add(b *testing.B) { var a float32 = 0.1 for i := 0; i < b.N; i++ { a += 1.0 } } func Benchmark_Float64Add(b *testing.B) { var a float64 = 0.1 for i := 0; i < b.N; i++ { a += 1.0 } } func Benchmark_IntSub(b *testing.B) { var a = 0x7FFFFFFFFF for i := 0; i < b.N; i++ { a -= 1 } } func Benchmark_Int8Sub(b *testing.B) { var a int8 = 0x7F for i := 0; i < b.N; i++ { a -= 1 } } func Benchmark_Int16Sub(b *testing.B) { var a int16 = 0x7FFF for i := 0; i < b.N; i++ { a -= 1 } } func Benchmark_Int32Sub(b *testing.B) { var a int32 = 0x7FFFFFFF for i := 0; i < b.N; i++ { a -= 1 } } func Benchmark_Int64Sub(b *testing.B) { var a int64 = 0x7FFFFFFFFF for i := 0; i < b.N; i++ { a -= 1 } } func Benchmark_Float32Sub(b *testing.B) { var a = float32(0x7FFFFFFF) for i := 0; i < b.N; i++ { a -= 1.0 } } func Benchmark_Float64Sub(b *testing.B) { var a = float64(0xFFFFFFFFFF) for i := 0; i < b.N; i++ { a -= 1.0 } } func Benchmark_IntMul(b *testing.B) { var a = 1 for i := 0; i < b.N; i++ { a *= 3 } } func Benchmark_Int8Mul(b *testing.B) { var a int8 = 1 for i := 0; i < b.N; i++ { a *= 3 } } func Benchmark_Int16Mul(b *testing.B) { var a int16 = 1 for i := 0; i < b.N; i++ { a *= 3 } } func Benchmark_Int32Mul(b *testing.B) { var a int32 = 1 for i := 0; i < b.N; i++ { a *= 3 } } func Benchmark_Int64Mul(b *testing.B) { var a int64 = 1 for i := 0; i < b.N; i++ { a *= 3 } } func Benchmark_Float32Mul(b *testing.B) { var a float32 = 1.0 for i := 0; i < b.N; i++ { a *= 1.5 } } func Benchmark_Float64Mul(b *testing.B) { var a float64 = 1.0 for i := 0; i < b.N; i++ { a *= 1.5 } } func Benchmark_IntDiv(b *testing.B) { var a = 0x7FFFFFFFFF for i := 0; i < b.N; i++ { a /= 3 } } func Benchmark_Int8Div(b *testing.B) { var a int8 = 0x7F for i := 0; i < b.N; i++ { a /= 3 } } func Benchmark_Int16Div(b *testing.B) { var a int16 = 0x7FFF for i := 0; i < b.N; i++ { a /= 3 } } func Benchmark_Int32Div(b *testing.B) { var a int32 = 0x7FFFFFFF for i := 0; i < b.N; i++ { a /= 3 } } func Benchmark_Int64Div(b *testing.B) { var a int64 = 0x7FFFFFFFFF for i := 0; i < b.N; i++ { a /= 3 } } func Benchmark_Float32Div(b *testing.B) { var a = float32(0x7FFFFFFF) for i := 0; i < b.N; i++ { a /= 1.5 } } func Benchmark_Float64Div(b *testing.B) { var a = float64(0x7FFFFFFFFF) for i := 0; i < b.N; i++ { a /= 1.5 } }
>go test -test.bench=".*" labs03_test.go goos: windows goarch: amd64 Benchmark_IntAdd-8 1000000000 0.274 ns/op Benchmark_Int8Add-8 1000000000 0.280 ns/op Benchmark_Int16Add-8 1000000000 0.280 ns/op Benchmark_Int32Add-8 1000000000 0.281 ns/op Benchmark_Int64Add-8 1000000000 0.279 ns/op Benchmark_Float32Add-8 1000000000 0.280 ns/op Benchmark_Float64Add-8 1000000000 0.277 ns/op Benchmark_IntSub-8 1000000000 0.280 ns/op Benchmark_Int8Sub-8 1000000000 0.280 ns/op Benchmark_Int16Sub-8 1000000000 0.280 ns/op Benchmark_Int32Sub-8 1000000000 0.284 ns/op Benchmark_Int64Sub-8 1000000000 0.285 ns/op Benchmark_Float32Sub-8 1000000000 0.290 ns/op Benchmark_Float64Sub-8 1000000000 0.281 ns/op Benchmark_IntMul-8 1000000000 0.291 ns/op Benchmark_Int8Mul-8 1000000000 0.284 ns/op Benchmark_Int16Mul-8 1000000000 0.285 ns/op Benchmark_Int32Mul-8 1000000000 0.290 ns/op Benchmark_Int64Mul-8 1000000000 0.281 ns/op Benchmark_Float32Mul-8 1000000000 0.282 ns/op Benchmark_Float64Mul-8 1000000000 0.285 ns/op Benchmark_IntDiv-8 1000000000 0.285 ns/op Benchmark_Int8Div-8 1000000000 0.279 ns/op Benchmark_Int16Div-8 1000000000 0.277 ns/op Benchmark_Int32Div-8 1000000000 0.283 ns/op Benchmark_Int64Div-8 1000000000 0.279 ns/op Benchmark_Float32Div-8 1000000000 0.282 ns/op Benchmark_Float64Div-8 1000000000 0.284 ns/op
可以看出,go对于运算的优化做的还是可以的,理论上浮点数的除法应该性能稍差的。
map还是slice
type BigStruct struct { C01 int C02 int C03 int C04 int C05 int C06 int C07 int C08 int C09 int C10 int C11 int C12 int C13 int C14 int C15 int C16 int C17 int C18 int C19 int C20 int C21 int C22 int C23 int C24 int C25 int C26 int C27 int C28 int C29 int C30 int } func Loop1(a []BigStruct) int { for i := 0; i < len(a); i++ { if a[i].C30 == 3 { return i } } return -1 } func Loop2(a []BigStruct) int { for i := len(a) - 1; i >= 0; i-- { if a[i].C30 == 1 { return i } } return -1 } func Loop3(a map[int]BigStruct) int { return a[2].C30 } func Loop4(a []*BigStruct) int { for i, x := range a { if x.C30 == 3 { return i } } return -1 } func Loop5(a []BigStruct) int { switch { case a[0].C01 == 3: return 0 case a[1].C01 == 3: return 1 case a[2].C01 == 3: return 2 } return -1 } func Benchmark_Loop1(b *testing.B) { var a = make([]BigStruct, 3) a[0].C30 = 1 a[1].C30 = 2 a[2].C30 = 3 for i := 0; i < b.N; i++ { Loop1(a) } } func Benchmark_Loop2(b *testing.B) { var a = make([]BigStruct, 3) a[0].C30 = 1 a[1].C30 = 2 a[2].C30 = 3 for i := 0; i < b.N; i++ { Loop2(a) } } func Benchmark_Loop3(b *testing.B) { var a = make(map[int]BigStruct, 3) a[0] = BigStruct{C30: 1} a[1] = BigStruct{C30: 2} a[2] = BigStruct{C30: 3} for i := 0; i < b.N; i++ { Loop3(a) } } func Benchmark_Loop4(b *testing.B) { var a = make([]*BigStruct, 3) a[0] = &BigStruct{C30: 1} a[1] = &BigStruct{C30: 2} a[2] = &BigStruct{C30: 3} for i := 0; i < b.N; i++ { Loop4(a) } } func Benchmark_Loop5(b *testing.B) { var a = make([]BigStruct, 3) a[0].C30 = 1 a[1].C30 = 2 a[2].C30 = 3 for i := 0; i < b.N; i++ { Loop5(a) } }
Benchmark_Loop1-8 333304538 3.60 ns/op Benchmark_Loop2-8 272679490 4.36 ns/op Benchmark_Loop3-8 99990834 11.2 ns/op Benchmark_Loop4-8 359211422 3.41 ns/op Benchmark_Loop5-8 1000000000 0.282 ns/op
可以看出map明显在这个量级不合适,当然,我们也并没有进行正规的测试,但是写代码的人都该有map的优缺点意识。别的其实差不多了,但是不要用非指针的range item。
也没内容,测着玩。