Source file src/internal/pkgbits/encoder.go

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pkgbits
     6  
     7  import (
     8  	"bytes"
     9  	"crypto/md5"
    10  	"encoding/binary"
    11  	"go/constant"
    12  	"io"
    13  	"math/big"
    14  	"runtime"
    15  	"strings"
    16  )
    17  
    18  // currentVersion is the current version number.
    19  //
    20  //   - v0: initial prototype
    21  //
    22  //   - v1: adds the flags uint32 word
    23  //
    24  // TODO(mdempsky): For the next version bump:
    25  //   - remove the legacy "has init" bool from the public root
    26  //   - remove obj's "derived func instance" bool
    27  const currentVersion uint32 = 1
    28  
    29  // A PkgEncoder provides methods for encoding a package's Unified IR
    30  // export data.
    31  type PkgEncoder struct {
    32  	// elems holds the bitstream for previously encoded elements.
    33  	elems [numRelocs][]string
    34  
    35  	// stringsIdx maps previously encoded strings to their index within
    36  	// the RelocString section, to allow deduplication. That is,
    37  	// elems[RelocString][stringsIdx[s]] == s (if present).
    38  	stringsIdx map[string]Index
    39  
    40  	// syncFrames is the number of frames to write at each sync
    41  	// marker. A negative value means sync markers are omitted.
    42  	syncFrames int
    43  }
    44  
    45  // SyncMarkers reports whether pw uses sync markers.
    46  func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
    47  
    48  // NewPkgEncoder returns an initialized PkgEncoder.
    49  //
    50  // syncFrames is the number of caller frames that should be serialized
    51  // at Sync points. Serializing additional frames results in larger
    52  // export data files, but can help diagnosing desync errors in
    53  // higher-level Unified IR reader/writer code. If syncFrames is
    54  // negative, then sync markers are omitted entirely.
    55  func NewPkgEncoder(syncFrames int) PkgEncoder {
    56  	return PkgEncoder{
    57  		stringsIdx: make(map[string]Index),
    58  		syncFrames: syncFrames,
    59  	}
    60  }
    61  
    62  // DumpTo writes the package's encoded data to out0 and returns the
    63  // package fingerprint.
    64  func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
    65  	h := md5.New()
    66  	out := io.MultiWriter(out0, h)
    67  
    68  	writeUint32 := func(x uint32) {
    69  		assert(binary.Write(out, binary.LittleEndian, x) == nil)
    70  	}
    71  
    72  	writeUint32(currentVersion)
    73  
    74  	var flags uint32
    75  	if pw.SyncMarkers() {
    76  		flags |= flagSyncMarkers
    77  	}
    78  	writeUint32(flags)
    79  
    80  	// Write elemEndsEnds.
    81  	var sum uint32
    82  	for _, elems := range &pw.elems {
    83  		sum += uint32(len(elems))
    84  		writeUint32(sum)
    85  	}
    86  
    87  	// Write elemEnds.
    88  	sum = 0
    89  	for _, elems := range &pw.elems {
    90  		for _, elem := range elems {
    91  			sum += uint32(len(elem))
    92  			writeUint32(sum)
    93  		}
    94  	}
    95  
    96  	// Write elemData.
    97  	for _, elems := range &pw.elems {
    98  		for _, elem := range elems {
    99  			_, err := io.WriteString(out, elem)
   100  			assert(err == nil)
   101  		}
   102  	}
   103  
   104  	// Write fingerprint.
   105  	copy(fingerprint[:], h.Sum(nil))
   106  	_, err := out0.Write(fingerprint[:])
   107  	assert(err == nil)
   108  
   109  	return
   110  }
   111  
   112  // StringIdx adds a string value to the strings section, if not
   113  // already present, and returns its index.
   114  func (pw *PkgEncoder) StringIdx(s string) Index {
   115  	if idx, ok := pw.stringsIdx[s]; ok {
   116  		assert(pw.elems[RelocString][idx] == s)
   117  		return idx
   118  	}
   119  
   120  	idx := Index(len(pw.elems[RelocString]))
   121  	pw.elems[RelocString] = append(pw.elems[RelocString], s)
   122  	pw.stringsIdx[s] = idx
   123  	return idx
   124  }
   125  
   126  // NewEncoder returns an Encoder for a new element within the given
   127  // section, and encodes the given SyncMarker as the start of the
   128  // element bitstream.
   129  func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
   130  	e := pw.NewEncoderRaw(k)
   131  	e.Sync(marker)
   132  	return e
   133  }
   134  
   135  // NewEncoderRaw returns an Encoder for a new element within the given
   136  // section.
   137  //
   138  // Most callers should use NewEncoder instead.
   139  func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
   140  	idx := Index(len(pw.elems[k]))
   141  	pw.elems[k] = append(pw.elems[k], "") // placeholder
   142  
   143  	return Encoder{
   144  		p:   pw,
   145  		k:   k,
   146  		Idx: idx,
   147  	}
   148  }
   149  
   150  // An Encoder provides methods for encoding an individual element's
   151  // bitstream data.
   152  type Encoder struct {
   153  	p *PkgEncoder
   154  
   155  	Relocs   []RelocEnt
   156  	RelocMap map[RelocEnt]uint32
   157  	Data     bytes.Buffer // accumulated element bitstream data
   158  
   159  	encodingRelocHeader bool
   160  
   161  	k   RelocKind
   162  	Idx Index // index within relocation section
   163  }
   164  
   165  // Flush finalizes the element's bitstream and returns its Index.
   166  func (w *Encoder) Flush() Index {
   167  	var sb strings.Builder
   168  
   169  	// Backup the data so we write the relocations at the front.
   170  	var tmp bytes.Buffer
   171  	io.Copy(&tmp, &w.Data)
   172  
   173  	// TODO(mdempsky): Consider writing these out separately so they're
   174  	// easier to strip, along with function bodies, so that we can prune
   175  	// down to just the data that's relevant to go/types.
   176  	if w.encodingRelocHeader {
   177  		panic("encodingRelocHeader already true; recursive flush?")
   178  	}
   179  	w.encodingRelocHeader = true
   180  	w.Sync(SyncRelocs)
   181  	w.Len(len(w.Relocs))
   182  	for _, rEnt := range w.Relocs {
   183  		w.Sync(SyncReloc)
   184  		w.Len(int(rEnt.Kind))
   185  		w.Len(int(rEnt.Idx))
   186  	}
   187  
   188  	io.Copy(&sb, &w.Data)
   189  	io.Copy(&sb, &tmp)
   190  	w.p.elems[w.k][w.Idx] = sb.String()
   191  
   192  	return w.Idx
   193  }
   194  
   195  func (w *Encoder) checkErr(err error) {
   196  	if err != nil {
   197  		errorf("unexpected encoding error: %v", err)
   198  	}
   199  }
   200  
   201  func (w *Encoder) rawUvarint(x uint64) {
   202  	var buf [binary.MaxVarintLen64]byte
   203  	n := binary.PutUvarint(buf[:], x)
   204  	_, err := w.Data.Write(buf[:n])
   205  	w.checkErr(err)
   206  }
   207  
   208  func (w *Encoder) rawVarint(x int64) {
   209  	// Zig-zag encode.
   210  	ux := uint64(x) << 1
   211  	if x < 0 {
   212  		ux = ^ux
   213  	}
   214  
   215  	w.rawUvarint(ux)
   216  }
   217  
   218  func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
   219  	e := RelocEnt{r, idx}
   220  	if w.RelocMap != nil {
   221  		if i, ok := w.RelocMap[e]; ok {
   222  			return int(i)
   223  		}
   224  	} else {
   225  		w.RelocMap = make(map[RelocEnt]uint32)
   226  	}
   227  
   228  	i := len(w.Relocs)
   229  	w.RelocMap[e] = uint32(i)
   230  	w.Relocs = append(w.Relocs, e)
   231  	return i
   232  }
   233  
   234  func (w *Encoder) Sync(m SyncMarker) {
   235  	if !w.p.SyncMarkers() {
   236  		return
   237  	}
   238  
   239  	// Writing out stack frame string references requires working
   240  	// relocations, but writing out the relocations themselves involves
   241  	// sync markers. To prevent infinite recursion, we simply trim the
   242  	// stack frame for sync markers within the relocation header.
   243  	var frames []string
   244  	if !w.encodingRelocHeader && w.p.syncFrames > 0 {
   245  		pcs := make([]uintptr, w.p.syncFrames)
   246  		n := runtime.Callers(2, pcs)
   247  		frames = fmtFrames(pcs[:n]...)
   248  	}
   249  
   250  	// TODO(mdempsky): Save space by writing out stack frames as a
   251  	// linked list so we can share common stack frames.
   252  	w.rawUvarint(uint64(m))
   253  	w.rawUvarint(uint64(len(frames)))
   254  	for _, frame := range frames {
   255  		w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
   256  	}
   257  }
   258  
   259  // Bool encodes and writes a bool value into the element bitstream,
   260  // and then returns the bool value.
   261  //
   262  // For simple, 2-alternative encodings, the idiomatic way to call Bool
   263  // is something like:
   264  //
   265  //	if w.Bool(x != 0) {
   266  //		// alternative #1
   267  //	} else {
   268  //		// alternative #2
   269  //	}
   270  //
   271  // For multi-alternative encodings, use Code instead.
   272  func (w *Encoder) Bool(b bool) bool {
   273  	w.Sync(SyncBool)
   274  	var x byte
   275  	if b {
   276  		x = 1
   277  	}
   278  	err := w.Data.WriteByte(x)
   279  	w.checkErr(err)
   280  	return b
   281  }
   282  
   283  // Int64 encodes and writes an int64 value into the element bitstream.
   284  func (w *Encoder) Int64(x int64) {
   285  	w.Sync(SyncInt64)
   286  	w.rawVarint(x)
   287  }
   288  
   289  // Uint64 encodes and writes a uint64 value into the element bitstream.
   290  func (w *Encoder) Uint64(x uint64) {
   291  	w.Sync(SyncUint64)
   292  	w.rawUvarint(x)
   293  }
   294  
   295  // Len encodes and writes a non-negative int value into the element bitstream.
   296  func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
   297  
   298  // Int encodes and writes an int value into the element bitstream.
   299  func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
   300  
   301  // Len encodes and writes a uint value into the element bitstream.
   302  func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
   303  
   304  // Reloc encodes and writes a relocation for the given (section,
   305  // index) pair into the element bitstream.
   306  //
   307  // Note: Only the index is formally written into the element
   308  // bitstream, so bitstream decoders must know from context which
   309  // section an encoded relocation refers to.
   310  func (w *Encoder) Reloc(r RelocKind, idx Index) {
   311  	w.Sync(SyncUseReloc)
   312  	w.Len(w.rawReloc(r, idx))
   313  }
   314  
   315  // Code encodes and writes a Code value into the element bitstream.
   316  func (w *Encoder) Code(c Code) {
   317  	w.Sync(c.Marker())
   318  	w.Len(c.Value())
   319  }
   320  
   321  // String encodes and writes a string value into the element
   322  // bitstream.
   323  //
   324  // Internally, strings are deduplicated by adding them to the strings
   325  // section (if not already present), and then writing a relocation
   326  // into the element bitstream.
   327  func (w *Encoder) String(s string) {
   328  	w.StringRef(w.p.StringIdx(s))
   329  }
   330  
   331  // StringRef writes a reference to the given index, which must be a
   332  // previously encoded string value.
   333  func (w *Encoder) StringRef(idx Index) {
   334  	w.Sync(SyncString)
   335  	w.Reloc(RelocString, idx)
   336  }
   337  
   338  // Strings encodes and writes a variable-length slice of strings into
   339  // the element bitstream.
   340  func (w *Encoder) Strings(ss []string) {
   341  	w.Len(len(ss))
   342  	for _, s := range ss {
   343  		w.String(s)
   344  	}
   345  }
   346  
   347  // Value encodes and writes a constant.Value into the element
   348  // bitstream.
   349  func (w *Encoder) Value(val constant.Value) {
   350  	w.Sync(SyncValue)
   351  	if w.Bool(val.Kind() == constant.Complex) {
   352  		w.scalar(constant.Real(val))
   353  		w.scalar(constant.Imag(val))
   354  	} else {
   355  		w.scalar(val)
   356  	}
   357  }
   358  
   359  func (w *Encoder) scalar(val constant.Value) {
   360  	switch v := constant.Val(val).(type) {
   361  	default:
   362  		errorf("unhandled %v (%v)", val, val.Kind())
   363  	case bool:
   364  		w.Code(ValBool)
   365  		w.Bool(v)
   366  	case string:
   367  		w.Code(ValString)
   368  		w.String(v)
   369  	case int64:
   370  		w.Code(ValInt64)
   371  		w.Int64(v)
   372  	case *big.Int:
   373  		w.Code(ValBigInt)
   374  		w.bigInt(v)
   375  	case *big.Rat:
   376  		w.Code(ValBigRat)
   377  		w.bigInt(v.Num())
   378  		w.bigInt(v.Denom())
   379  	case *big.Float:
   380  		w.Code(ValBigFloat)
   381  		w.bigFloat(v)
   382  	}
   383  }
   384  
   385  func (w *Encoder) bigInt(v *big.Int) {
   386  	b := v.Bytes()
   387  	w.String(string(b)) // TODO: More efficient encoding.
   388  	w.Bool(v.Sign() < 0)
   389  }
   390  
   391  func (w *Encoder) bigFloat(v *big.Float) {
   392  	b := v.Append(nil, 'p', -1)
   393  	w.String(string(b)) // TODO: More efficient encoding.
   394  }
   395  

View as plain text