dl.google.com: Powered by Go

26 July 2013

Brad Fitzpatrick

Gopher, Google

Overview / tl;dw:

2

too long...

3

me

4

I love Go

5

dl.google.com

6

dl.google.com

7

Why port?

8

reason 0

$ apt-get update

9

Yeah, embarrassing, for years...

10

... which led to:

11

How hard can this be?

12

dl.google.com: few tricks

each "payload" (~URL) described by a protobuf:

13

dl.google.com: how it was

14

Aside: Why good code goes bad

15

Why good code goes bad

16

code complexity

17

changing environment

18

so why did it suck?

19

but why?

20

Old code

21

Mitigation solution?

22

Summary of 5-year old code in 2012

23

Environment changes

24

Copying N bytes from A to B in event loop environments (node.js, this C++, etc)

25

Thought that sucked? Try to mix in other state / logic, and then write it in C++.

26

27

28

29

Or in JavaScript...

30

Copying N bytes from A to B in Go:

    n, err := io.Copy(dst, src)
31

Where to start?

32

Notable stages

33

Notable stages

34

Using Go's Standard Library

35

Using Go's Standard Library

36

Go's Standard Library

37

Hello World

package main

import (
    "fmt"
    "log"
    "net/http"
    "os"
)

func handler(w http.ResponseWriter, r *http.Request) {
    fmt.Fprintf(os.Stdout, "%s details: %+v\n", r.URL.Path, r)
    fmt.Fprintf(w, "Hello, world! at %s\n", r.URL.Path)
}

func main() {
    log.Printf("Running...")
    log.Fatal(http.ListenAndServe("127.0.0.1:8080", http.HandlerFunc(handler)))
}
38

File Server

package main

import (
    "log"
    "net/http"
    "os"
    "path/filepath"
)

func main() {
    log.Printf("Running...")
    log.Fatal(http.ListenAndServe(
        "127.0.0.1:8080",
        http.FileServer(http.Dir(
            filepath.Join(os.Getenv("HOME"), "go", "doc")))))
}
39

http.ServeContent

40

io.Reader, io.Seeker

41

http.ServeContent

$ curl -H "Range: bytes=5-" http://localhost:8080

package main

import (
    "log"
    "net/http"
    "strings"
    "time"
)

func main() {
    log.Printf("Running...")
    err := http.ListenAndServe("127.0.0.1:8080", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
        http.ServeContent(w, r, "foo.txt", time.Now(),
            strings.NewReader("I am some content.\n"))
    }))
    log.Fatal(err)
}
42

groupcache

43

groupcache

44

Using groupcache

Declare who you are and who your peers are.

    me := "http://10.0.0.1"
    peers := groupcache.NewHTTPPool(me)

    // Whenever peers change:
    peers.Set("http://10.0.0.1", "http://10.0.0.2", "http://10.0.0.3")

This peer interface is pluggable. (e.g. inside Google it's automatic.)

45

Using groupcache

Declare a group. (group of keys, shared between group of peers)

    var thumbNails = groupcache.NewGroup("thumbnail", 64<<20, groupcache.GetterFunc(
        func(ctx groupcache.Context, key string, dest groupcache.Sink) error {
            fileName := key
            dest.SetBytes(generateThumbnail(fileName))
            return nil
        }))
46

Using groupcache

Request keys

    var data []byte
    err := thumbNails.Get(ctx, "big-file.jpg",
        groupcache.AllocatingByteSliceSink(&data))
    // ...
    http.ServeContent(w, r, "big-file-thumb.jpg", modTime, bytes.NewReader(data))
47

dl.google.com and groupcache

48

dl.google.com interface composition

// A SizeReaderAt is a ReaderAt with a Size method.
//
// An io.SectionReader implements SizeReaderAt.
type SizeReaderAt interface {
    Size() int64
    io.ReaderAt
}

// NewMultiReaderAt is like io.MultiReader but produces a ReaderAt
// (and Size), instead of just a reader.
func NewMultiReaderAt(parts ...SizeReaderAt) SizeReaderAt {
    m := &multi{
        parts: make([]offsetAndSource, 0, len(parts)),
    }
    var off int64
    for _, p := range parts {
        m.parts = append(m.parts, offsetAndSource{off, p})
        off += p.Size()
    }
    m.size = off
    return m
}
49

io.SectionReader

50

chunk-aligned ReaderAt

// NewChunkAlignedReaderAt returns a ReaderAt wrapper that is backed
// by a ReaderAt r of size totalSize where the wrapper guarantees that
// all ReadAt calls are aligned to chunkSize boundaries and of size
// chunkSize (except for the final chunk, which may be shorter).
//
// A chunk-aligned reader is good for caching, letting upper layers have
// any access pattern, but guarantees that the wrapped ReaderAt sees
// only nicely-cacheable access patterns & sizes.
func NewChunkAlignedReaderAt(r SizeReaderAt, chunkSize int) SizeReaderAt {
    // ...
}
51

Composing all this

// +build ignore,OMIT

package main

import (
	"io"
	"log"
	"net/http"
	"sort"
	"strings"
	"time"
)

var modTime = time.Unix(1374708739, 0)

func part(s string) SizeReaderAt {
    return io.NewSectionReader(strings.NewReader(s), 0, int64(len(s)))
}

func handler(w http.ResponseWriter, r *http.Request) {
    sra := NewMultiReaderAt(
        part("Hello, "), part(" world! "),
        part("You requested "+r.URL.Path+"\n"),
    )
    rs := io.NewSectionReader(sra, 0, sra.Size())
    http.ServeContent(w, r, "foo.txt", modTime, rs)
}

func main() {
	log.Printf("Running...")
	http.HandleFunc("/", handler)
	log.Fatal(http.ListenAndServe("127.0.0.1:8080", nil))
}

// START_1 OMIT
// A SizeReaderAt is a ReaderAt with a Size method.
//
// An io.SectionReader implements SizeReaderAt.
type SizeReaderAt interface {
	Size() int64
	io.ReaderAt
}

// NewMultiReaderAt is like io.MultiReader but produces a ReaderAt
// (and Size), instead of just a reader.
func NewMultiReaderAt(parts ...SizeReaderAt) SizeReaderAt {
	m := &multi{
		parts: make([]offsetAndSource, 0, len(parts)),
	}
	var off int64
	for _, p := range parts {
		m.parts = append(m.parts, offsetAndSource{off, p})
		off += p.Size()
	}
	m.size = off
	return m
}

// END_1 OMIT

type offsetAndSource struct {
	off int64
	SizeReaderAt
}

type multi struct {
	parts []offsetAndSource
	size  int64
}

func (m *multi) Size() int64 { return m.size }

func (m *multi) ReadAt(p []byte, off int64) (n int, err error) {
	wantN := len(p)

	// Skip past the requested offset.
	skipParts := sort.Search(len(m.parts), func(i int) bool {
		// This function returns whether parts[i] will
		// contribute any bytes to our output.
		part := m.parts[i]
		return part.off+part.Size() > off
	})
	parts := m.parts[skipParts:]

	// How far to skip in the first part.
	needSkip := off
	if len(parts) > 0 {
		needSkip -= parts[0].off
	}

	for len(parts) > 0 && len(p) > 0 {
		readP := p
		partSize := parts[0].Size()
		if int64(len(readP)) > partSize-needSkip {
			readP = readP[:partSize-needSkip]
		}
		pn, err0 := parts[0].ReadAt(readP, needSkip)
		if err0 != nil {
			return n, err0
		}
		n += pn
		p = p[pn:]
		if int64(pn)+needSkip == partSize {
			parts = parts[1:]
		}
		needSkip = 0
	}

	if n != wantN {
		err = io.ErrUnexpectedEOF
	}
	return
}
52

Things we get for free from net/http

53

Overall simplification

54

From this...

55

... to this.

56

And from page and pages of this...

57

... to this

58

So how does it compare to C++?

59

Could we have just rewritten it in new C++?

60

Could I have just fixed the bugs in the C++ version?

61

How much of dl.google.com is closed-source?

62

Thank you

Use the left and right arrow keys or click the left and right edges of the page to navigate between slides.
(Press 'H' or navigate to hide this message.)