From 7dbf9d43f5a62a604ab3e6ceb1ee7ac4f3a80d80 Mon Sep 17 00:00:00 2001 From: Javier Kohen Date: Tue, 20 Mar 2018 14:35:37 -0400 Subject: [PATCH] regexp: use sync.Pool to cache regexp.machine objects Performance optimization for the internals of the Regexp type. This adds no features and has no user-visible impact beyond performance. Copy now shares the cache, so memory usage for programs that use Copy a lot should go down; Copy has effectively become a no-op. The before v. after benchmark results show a lot of noise from run to run, but there's a clear improvement to the Shared case and no detriment to the Copied case. BenchmarkMatchParallelShared-4 361 77.9 -78.42% BenchmarkMatchParallelCopied-4 70.3 72.2 +2.70% Macro benchmarks show that the lock contention in Regexp is gone, and my server is now able to scale linearly 2.5x times more than before (and I only stopped there because I ran out of CPU in my test machine). Fixes #24411 Change-Id: Ib33abff2802f27599f5d09084775e95b54e3e1d7 Reviewed-on: https://go-review.googlesource.com/101715 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- src/regexp/regexp.go | 62 ++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go index ae6ff23702..7a214ad156 100644 --- a/src/regexp/regexp.go +++ b/src/regexp/regexp.go @@ -79,15 +79,13 @@ import ( // A Regexp is safe for concurrent use by multiple goroutines, // except for configuration methods, such as Longest. type Regexp struct { - // read-only after Compile - regexpRO + // cache of machines for running regexp. This is a shared pointer across + // all copies of the original Regexp object to decrease the overall + // memory footprint of the regexps (since there will be one machine + // cached per thread instead of one per thread per copy). + machines *sync.Pool - // cache of machines for running regexp - mu sync.Mutex - machine []*machine -} - -type regexpRO struct { + // everything below is read-only after Compile expr string // as passed to Compile prog *syntax.Prog // compiled program onepass *onePassProg // onepass program or nil @@ -109,14 +107,10 @@ func (re *Regexp) String() string { // Copy returns a new Regexp object copied from re. // -// When using a Regexp in multiple goroutines, giving each goroutine -// its own copy helps to avoid lock contention. +// Deprecated: This exists for historical reasons. func (re *Regexp) Copy() *Regexp { - // It is not safe to copy Regexp by value - // since it contains a sync.Mutex. - return &Regexp{ - regexpRO: re.regexpRO, - } + re2 := *re + return &re2 } // Compile parses a regular expression and returns, if successful, @@ -179,15 +173,21 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { if err != nil { return nil, err } + onepass := compileOnePass(prog) regexp := &Regexp{ - regexpRO: regexpRO{ - expr: expr, - prog: prog, - onepass: compileOnePass(prog), - numSubexp: maxCap, - subexpNames: capNames, - cond: prog.StartCond(), - longest: longest, + expr: expr, + prog: prog, + onepass: onepass, + numSubexp: maxCap, + subexpNames: capNames, + cond: prog.StartCond(), + longest: longest, + } + regexp.machines = &sync.Pool{ + New: func() interface{} { + z := progMachine(prog, onepass) + z.re = regexp + return z }, } if regexp.onepass == notOnePass { @@ -208,17 +208,7 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { // It uses the re's machine cache if possible, to avoid // unnecessary allocation. func (re *Regexp) get() *machine { - re.mu.Lock() - if n := len(re.machine); n > 0 { - z := re.machine[n-1] - re.machine = re.machine[:n-1] - re.mu.Unlock() - return z - } - re.mu.Unlock() - z := progMachine(re.prog, re.onepass) - z.re = re - return z + return re.machines.Get().(*machine) } // put returns a machine to the re's machine cache. @@ -231,9 +221,7 @@ func (re *Regexp) put(z *machine) { z.inputString.str = "" z.inputReader.r = nil - re.mu.Lock() - re.machine = append(re.machine, z) - re.mu.Unlock() + re.machines.Put(z) } // MustCompile is like Compile but panics if the expression cannot be parsed. -- 2.50.0