gcc/libgo/go/golang.org/x/mod/sumdb/client.go

// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package sumdb

import (
	"bytes"
	"errors"
	"fmt"
	"path"
	"strings"
	"sync"
	"sync/atomic"

	"golang.org/x/mod/module"
	"golang.org/x/mod/sumdb/note"
	"golang.org/x/mod/sumdb/tlog"
)

// A ClientOps provides the external operations
// (file caching, HTTP fetches, and so on) needed by the Client.
// The methods must be safe for concurrent use by multiple goroutines.
type ClientOps interface {
	// ReadRemote reads and returns the content served at the given path
	// on the remote database server. The path begins with "/lookup" or "/tile/",
	// and there is no need to parse the path in any way.
	// It is the implementation's responsibility to turn that path into a full URL
	// and make the HTTP request. ReadRemote should return an error for
	// any non-200 HTTP response status.
	ReadRemote(path string) ([]byte, error)

	// ReadConfig reads and returns the content of the named configuration file.
	// There are only a fixed set of configuration files.
	//
	// "key" returns a file containing the verifier key for the server.
	//
	// serverName + "/latest" returns a file containing the latest known
	// signed tree from the server.
	// To signal that the client wishes to start with an "empty" signed tree,
	// ReadConfig can return a successful empty result (0 bytes of data).
	ReadConfig(file string) ([]byte, error)

	// WriteConfig updates the content of the named configuration file,
	// changing it from the old []byte to the new []byte.
	// If the old []byte does not match the stored configuration,
	// WriteConfig must return ErrWriteConflict.
	// Otherwise, WriteConfig should atomically replace old with new.
	// The "key" configuration file is never written using WriteConfig.
	WriteConfig(file string, old, new []byte) error

	// ReadCache reads and returns the content of the named cache file.
	// Any returned error will be treated as equivalent to the file not existing.
	// There can be arbitrarily many cache files, such as:
	//	serverName/lookup/pkg@version
	//	serverName/tile/8/1/x123/456
	ReadCache(file string) ([]byte, error)

	// WriteCache writes the named cache file.
	WriteCache(file string, data []byte)

	// Log prints the given log message (such as with log.Print)
	Log(msg string)

	// SecurityError prints the given security error log message.
	// The Client returns ErrSecurity from any operation that invokes SecurityError,
	// but the return value is mainly for testing. In a real program,
	// SecurityError should typically print the message and call log.Fatal or os.Exit.
	SecurityError(msg string)
}

// ErrWriteConflict signals a write conflict during Client.WriteConfig.
var ErrWriteConflict = errors.New("write conflict")

// ErrSecurity is returned by Client operations that invoke Client.SecurityError.
var ErrSecurity = errors.New("security error: misbehaving server")

// A Client is a client connection to a checksum database.
// All the methods are safe for simultaneous use by multiple goroutines.
type Client struct {
	ops ClientOps // access to operations in the external world

	didLookup uint32

	// one-time initialized data
	initOnce   sync.Once
	initErr    error          // init error, if any
	name       string         // name of accepted verifier
	verifiers  note.Verifiers // accepted verifiers (just one, but Verifiers for note.Open)
	tileReader tileReader
	tileHeight int
	nosumdb    string

	record    parCache // cache of record lookup, keyed by path@vers
	tileCache parCache // cache of c.readTile, keyed by tile

	latestMu  sync.Mutex
	latest    tlog.Tree // latest known tree head
	latestMsg []byte    // encoded signed note for latest

	tileSavedMu sync.Mutex
	tileSaved   map[tlog.Tile]bool // which tiles have been saved using c.ops.WriteCache already
}

// NewClient returns a new Client using the given Client.
func NewClient(ops ClientOps) *Client {
	return &Client{
		ops: ops,
	}
}

// init initiailzes the client (if not already initialized)
// and returns any initialization error.
func (c *Client) init() error {
	c.initOnce.Do(c.initWork)
	return c.initErr
}

// initWork does the actual initialization work.
func (c *Client) initWork() {
	defer func() {
		if c.initErr != nil {
			c.initErr = fmt.Errorf("initializing sumdb.Client: %v", c.initErr)
		}
	}()

	c.tileReader.c = c
	if c.tileHeight == 0 {
		c.tileHeight = 8
	}
	c.tileSaved = make(map[tlog.Tile]bool)

	vkey, err := c.ops.ReadConfig("key")
	if err != nil {
		c.initErr = err
		return
	}
	verifier, err := note.NewVerifier(strings.TrimSpace(string(vkey)))
	if err != nil {
		c.initErr = err
		return
	}
	c.verifiers = note.VerifierList(verifier)
	c.name = verifier.Name()

	data, err := c.ops.ReadConfig(c.name + "/latest")
	if err != nil {
		c.initErr = err
		return
	}
	if err := c.mergeLatest(data); err != nil {
		c.initErr = err
		return
	}
}

// SetTileHeight sets the tile height for the Client.
// Any call to SetTileHeight must happen before the first call to Lookup.
// If SetTileHeight is not called, the Client defaults to tile height 8.
// SetTileHeight can be called at most once,
// and if so it must be called before the first call to Lookup.
func (c *Client) SetTileHeight(height int) {
	if atomic.LoadUint32(&c.didLookup) != 0 {
		panic("SetTileHeight used after Lookup")
	}
	if height <= 0 {
		panic("invalid call to SetTileHeight")
	}
	if c.tileHeight != 0 {
		panic("multiple calls to SetTileHeight")
	}
	c.tileHeight = height
}

// SetGONOSUMDB sets the list of comma-separated GONOSUMDB patterns for the Client.
// For any module path matching one of the patterns,
// Lookup will return ErrGONOSUMDB.
// SetGONOSUMDB can be called at most once,
// and if so it must be called before the first call to Lookup.
func (c *Client) SetGONOSUMDB(list string) {
	if atomic.LoadUint32(&c.didLookup) != 0 {
		panic("SetGONOSUMDB used after Lookup")
	}
	if c.nosumdb != "" {
		panic("multiple calls to SetGONOSUMDB")
	}
	c.nosumdb = list
}

// ErrGONOSUMDB is returned by Lookup for paths that match
// a pattern listed in the GONOSUMDB list (set by SetGONOSUMDB,
// usually from the environment variable).
var ErrGONOSUMDB = errors.New("skipped (listed in GONOSUMDB)")

func (c *Client) skip(target string) bool {
	return globsMatchPath(c.nosumdb, target)
}

// globsMatchPath reports whether any path prefix of target
// matches one of the glob patterns (as defined by path.Match)
// in the comma-separated globs list.
// It ignores any empty or malformed patterns in the list.
func globsMatchPath(globs, target string) bool {
	for globs != "" {
		// Extract next non-empty glob in comma-separated list.
		var glob string
		if i := strings.Index(globs, ","); i >= 0 {
			glob, globs = globs[:i], globs[i+1:]
		} else {
			glob, globs = globs, ""
		}
		if glob == "" {
			continue
		}

		// A glob with N+1 path elements (N slashes) needs to be matched
		// against the first N+1 path elements of target,
		// which end just before the N+1'th slash.
		n := strings.Count(glob, "/")
		prefix := target
		// Walk target, counting slashes, truncating at the N+1'th slash.
		for i := 0; i < len(target); i++ {
			if target[i] == '/' {
				if n == 0 {
					prefix = target[:i]
					break
				}
				n--
			}
		}
		if n > 0 {
			// Not enough prefix elements.
			continue
		}
		matched, _ := path.Match(glob, prefix)
		if matched {
			return true
		}
	}
	return false
}

// Lookup returns the go.sum lines for the given module path and version.
// The version may end in a /go.mod suffix, in which case Lookup returns
// the go.sum lines for the module's go.mod-only hash.
func (c *Client) Lookup(path, vers string) (lines []string, err error) {
	atomic.StoreUint32(&c.didLookup, 1)

	if c.skip(path) {
		return nil, ErrGONOSUMDB
	}

	defer func() {
		if err != nil {
			err = fmt.Errorf("%s@%s: %v", path, vers, err)
		}
	}()

	if err := c.init(); err != nil {
		return nil, err
	}

	// Prepare encoded cache filename / URL.
	epath, err := module.EscapePath(path)
	if err != nil {
		return nil, err
	}
	evers, err := module.EscapeVersion(strings.TrimSuffix(vers, "/go.mod"))
	if err != nil {
		return nil, err
	}
	remotePath := "/lookup/" + epath + "@" + evers
	file := c.name + remotePath

	// Fetch the data.
	// The lookupCache avoids redundant ReadCache/GetURL operations
	// (especially since go.sum lines tend to come in pairs for a given
	// path and version) and also avoids having multiple of the same
	// request in flight at once.
	type cached struct {
		data []byte
		err  error
	}
	result := c.record.Do(file, func() interface{} {
		// Try the on-disk cache, or else get from web.
		writeCache := false
		data, err := c.ops.ReadCache(file)
		if err != nil {
			data, err = c.ops.ReadRemote(remotePath)
			if err != nil {
				return cached{nil, err}
			}
			writeCache = true
		}

		// Validate the record before using it for anything.
		id, text, treeMsg, err := tlog.ParseRecord(data)
		if err != nil {
			return cached{nil, err}
		}
		if err := c.mergeLatest(treeMsg); err != nil {
			return cached{nil, err}
		}
		if err := c.checkRecord(id, text); err != nil {
			return cached{nil, err}
		}

		// Now that we've validated the record,
		// save it to the on-disk cache (unless that's where it came from).
		if writeCache {
			c.ops.WriteCache(file, data)
		}

		return cached{data, nil}
	}).(cached)
	if result.err != nil {
		return nil, result.err
	}

	// Extract the lines for the specific version we want
	// (with or without /go.mod).
	prefix := path + " " + vers + " "
	var hashes []string
	for _, line := range strings.Split(string(result.data), "\n") {
		if strings.HasPrefix(line, prefix) {
			hashes = append(hashes, line)
		}
	}
	return hashes, nil
}

// mergeLatest merges the tree head in msg
// with the Client's current latest tree head,
// ensuring the result is a consistent timeline.
// If the result is inconsistent, mergeLatest calls c.ops.SecurityError
// with a detailed security error message and then
// (only if c.ops.SecurityError does not exit the program) returns ErrSecurity.
// If the Client's current latest tree head moves forward,
// mergeLatest updates the underlying configuration file as well,
// taking care to merge any independent updates to that configuration.
func (c *Client) mergeLatest(msg []byte) error {
	// Merge msg into our in-memory copy of the latest tree head.
	when, err := c.mergeLatestMem(msg)
	if err != nil {
		return err
	}
	if when != msgFuture {
		// msg matched our present or was in the past.
		// No change to our present, so no update of config file.
		return nil
	}

	// Flush our extended timeline back out to the configuration file.
	// If the configuration file has been updated in the interim,
	// we need to merge any updates made there as well.
	// Note that writeConfig is an atomic compare-and-swap.
	for {
		msg, err := c.ops.ReadConfig(c.name + "/latest")
		if err != nil {
			return err
		}
		when, err := c.mergeLatestMem(msg)
		if err != nil {
			return err
		}
		if when != msgPast {
			// msg matched our present or was from the future,
			// and now our in-memory copy matches.
			return nil
		}

		// msg (== config) is in the past, so we need to update it.
		c.latestMu.Lock()
		latestMsg := c.latestMsg
		c.latestMu.Unlock()
		if err := c.ops.WriteConfig(c.name+"/latest", msg, latestMsg); err != ErrWriteConflict {
			// Success or a non-write-conflict error.
			return err
		}
	}
}

const (
	msgPast = 1 + iota
	msgNow
	msgFuture
)

// mergeLatestMem is like mergeLatest but is only concerned with
// updating the in-memory copy of the latest tree head (c.latest)
// not the configuration file.
// The when result explains when msg happened relative to our
// previous idea of c.latest:
// msgPast means msg was from before c.latest,
// msgNow means msg was exactly c.latest, and
// msgFuture means msg was from after c.latest, which has now been updated.
func (c *Client) mergeLatestMem(msg []byte) (when int, err error) {
	if len(msg) == 0 {
		// Accept empty msg as the unsigned, empty timeline.
		c.latestMu.Lock()
		latest := c.latest
		c.latestMu.Unlock()
		if latest.N == 0 {
			return msgNow, nil
		}
		return msgPast, nil
	}

	note, err := note.Open(msg, c.verifiers)
	if err != nil {
		return 0, fmt.Errorf("reading tree note: %v\nnote:\n%s", err, msg)
	}
	tree, err := tlog.ParseTree([]byte(note.Text))
	if err != nil {
		return 0, fmt.Errorf("reading tree: %v\ntree:\n%s", err, note.Text)
	}

	// Other lookups may be calling mergeLatest with other heads,
	// so c.latest is changing underfoot. We don't want to hold the
	// c.mu lock during tile fetches, so loop trying to update c.latest.
	c.latestMu.Lock()
	latest := c.latest
	latestMsg := c.latestMsg
	c.latestMu.Unlock()

	for {
		// If the tree head looks old, check that it is on our timeline.
		if tree.N <= latest.N {
			if err := c.checkTrees(tree, msg, latest, latestMsg); err != nil {
				return 0, err
			}
			if tree.N < latest.N {
				return msgPast, nil
			}
			return msgNow, nil
		}

		// The tree head looks new. Check that we are on its timeline and try to move our timeline forward.
		if err := c.checkTrees(latest, latestMsg, tree, msg); err != nil {
			return 0, err
		}

		// Install our msg if possible.
		// Otherwise we will go around again.
		c.latestMu.Lock()
		installed := false
		if c.latest == latest {
			installed = true
			c.latest = tree
			c.latestMsg = msg
		} else {
			latest = c.latest
			latestMsg = c.latestMsg
		}
		c.latestMu.Unlock()

		if installed {
			return msgFuture, nil
		}
	}
}

// checkTrees checks that older (from olderNote) is contained in newer (from newerNote).
// If an error occurs, such as malformed data or a network problem, checkTrees returns that error.
// If on the other hand checkTrees finds evidence of misbehavior, it prepares a detailed
// message and calls log.Fatal.
func (c *Client) checkTrees(older tlog.Tree, olderNote []byte, newer tlog.Tree, newerNote []byte) error {
	thr := tlog.TileHashReader(newer, &c.tileReader)
	h, err := tlog.TreeHash(older.N, thr)
	if err != nil {
		if older.N == newer.N {
			return fmt.Errorf("checking tree#%d: %v", older.N, err)
		}
		return fmt.Errorf("checking tree#%d against tree#%d: %v", older.N, newer.N, err)
	}
	if h == older.Hash {
		return nil
	}

	// Detected a fork in the tree timeline.
	// Start by reporting the inconsistent signed tree notes.
	var buf bytes.Buffer
	fmt.Fprintf(&buf, "SECURITY ERROR\n")
	fmt.Fprintf(&buf, "go.sum database server misbehavior detected!\n\n")
	indent := func(b []byte) []byte {
		return bytes.Replace(b, []byte("\n"), []byte("\n\t"), -1)
	}
	fmt.Fprintf(&buf, "old database:\n\t%s\n", indent(olderNote))
	fmt.Fprintf(&buf, "new database:\n\t%s\n", indent(newerNote))

	// The notes alone are not enough to prove the inconsistency.
	// We also need to show that the newer note's tree hash for older.N
	// does not match older.Hash. The consumer of this report could
	// of course consult the server to try to verify the inconsistency,
	// but we are holding all the bits we need to prove it right now,
	// so we might as well print them and make the report not depend
	// on the continued availability of the misbehaving server.
	// Preparing this data only reuses the tiled hashes needed for
	// tlog.TreeHash(older.N, thr) above, so assuming thr is caching tiles,
	// there are no new access to the server here, and these operations cannot fail.
	fmt.Fprintf(&buf, "proof of misbehavior:\n\t%v", h)
	if p, err := tlog.ProveTree(newer.N, older.N, thr); err != nil {
		fmt.Fprintf(&buf, "\tinternal error: %v\n", err)
	} else if err := tlog.CheckTree(p, newer.N, newer.Hash, older.N, h); err != nil {
		fmt.Fprintf(&buf, "\tinternal error: generated inconsistent proof\n")
	} else {
		for _, h := range p {
			fmt.Fprintf(&buf, "\n\t%v", h)
		}
	}
	c.ops.SecurityError(buf.String())
	return ErrSecurity
}

// checkRecord checks that record #id's hash matches data.
func (c *Client) checkRecord(id int64, data []byte) error {
	c.latestMu.Lock()
	latest := c.latest
	c.latestMu.Unlock()

	if id >= latest.N {
		return fmt.Errorf("cannot validate record %d in tree of size %d", id, latest.N)
	}
	hashes, err := tlog.TileHashReader(latest, &c.tileReader).ReadHashes([]int64{tlog.StoredHashIndex(0, id)})
	if err != nil {
		return err
	}
	if hashes[0] == tlog.RecordHash(data) {
		return nil
	}
	return fmt.Errorf("cannot authenticate record data in server response")
}

// tileReader is a *Client wrapper that implements tlog.TileReader.
// The separate type avoids exposing the ReadTiles and SaveTiles
// methods on Client itself.
type tileReader struct {
	c *Client
}

func (r *tileReader) Height() int {
	return r.c.tileHeight
}

// ReadTiles reads and returns the requested tiles,
// either from the on-disk cache or the server.
func (r *tileReader) ReadTiles(tiles []tlog.Tile) ([][]byte, error) {
	// Read all the tiles in parallel.
	data := make([][]byte, len(tiles))
	errs := make([]error, len(tiles))
	var wg sync.WaitGroup
	for i, tile := range tiles {
		wg.Add(1)
		go func(i int, tile tlog.Tile) {
			defer wg.Done()
			data[i], errs[i] = r.c.readTile(tile)
		}(i, tile)
	}
	wg.Wait()

	for _, err := range errs {
		if err != nil {
			return nil, err
		}
	}

	return data, nil
}

// tileCacheKey returns the cache key for the tile.
func (c *Client) tileCacheKey(tile tlog.Tile) string {
	return c.name + "/" + tile.Path()
}

// tileRemotePath returns the remote path for the tile.
func (c *Client) tileRemotePath(tile tlog.Tile) string {
	return "/" + tile.Path()
}

// readTile reads a single tile, either from the on-disk cache or the server.
func (c *Client) readTile(tile tlog.Tile) ([]byte, error) {
	type cached struct {
		data []byte
		err  error
	}

	result := c.tileCache.Do(tile, func() interface{} {
		// Try the requested tile in on-disk cache.
		data, err := c.ops.ReadCache(c.tileCacheKey(tile))
		if err == nil {
			c.markTileSaved(tile)
			return cached{data, nil}
		}

		// Try the full tile in on-disk cache (if requested tile not already full).
		// We only save authenticated tiles to the on-disk cache,
		// so the recreated prefix is equally authenticated.
		full := tile
		full.W = 1 << uint(tile.H)
		if tile != full {
			data, err := c.ops.ReadCache(c.tileCacheKey(full))
			if err == nil {
				c.markTileSaved(tile) // don't save tile later; we already have full
				return cached{data[:len(data)/full.W*tile.W], nil}
			}
		}

		// Try requested tile from server.
		data, err = c.ops.ReadRemote(c.tileRemotePath(tile))
		if err == nil {
			return cached{data, nil}
		}

		// Try full tile on server.
		// If the partial tile does not exist, it should be because
		// the tile has been completed and only the complete one
		// is available.
		if tile != full {
			data, err := c.ops.ReadRemote(c.tileRemotePath(full))
			if err == nil {
				// Note: We could save the full tile in the on-disk cache here,
				// but we don't know if it is valid yet, and we will only find out
				// about the partial data, not the full data. So let SaveTiles
				// save the partial tile, and we'll just refetch the full tile later
				// once we can validate more (or all) of it.
				return cached{data[:len(data)/full.W*tile.W], nil}
			}
		}

		// Nothing worked.
		// Return the error from the server fetch for the requested (not full) tile.
		return cached{nil, err}
	}).(cached)

	return result.data, result.err
}

// markTileSaved records that tile is already present in the on-disk cache,
// so that a future SaveTiles for that tile can be ignored.
func (c *Client) markTileSaved(tile tlog.Tile) {
	c.tileSavedMu.Lock()
	c.tileSaved[tile] = true
	c.tileSavedMu.Unlock()
}

// SaveTiles saves the now validated tiles.
func (r *tileReader) SaveTiles(tiles []tlog.Tile, data [][]byte) {
	c := r.c

	// Determine which tiles need saving.
	// (Tiles that came from the cache need not be saved back.)
	save := make([]bool, len(tiles))
	c.tileSavedMu.Lock()
	for i, tile := range tiles {
		if !c.tileSaved[tile] {
			save[i] = true
			c.tileSaved[tile] = true
		}
	}
	c.tileSavedMu.Unlock()

	for i, tile := range tiles {
		if save[i] {
			// If WriteCache fails here (out of disk space? i/o error?),
			// c.tileSaved[tile] is still true and we will not try to write it again.
			// Next time we run maybe we'll redownload it again and be
			// more successful.
			c.ops.WriteCache(c.name+"/"+tile.Path(), data[i])
		}
	}
}