diff options
Diffstat (limited to 'libgo/go/http')
-rw-r--r-- | libgo/go/http/chunked.go | 56 | ||||
-rw-r--r-- | libgo/go/http/client.go | 236 | ||||
-rw-r--r-- | libgo/go/http/client_test.go | 40 | ||||
-rw-r--r-- | libgo/go/http/dump.go | 76 | ||||
-rw-r--r-- | libgo/go/http/fs.go | 265 | ||||
-rw-r--r-- | libgo/go/http/fs_test.go | 172 | ||||
-rw-r--r-- | libgo/go/http/lex.go | 144 | ||||
-rw-r--r-- | libgo/go/http/lex_test.go | 70 | ||||
-rw-r--r-- | libgo/go/http/persist.go | 303 | ||||
-rw-r--r-- | libgo/go/http/pprof/pprof.go | 92 | ||||
-rw-r--r-- | libgo/go/http/readrequest_test.go | 132 | ||||
-rw-r--r-- | libgo/go/http/request.go | 693 | ||||
-rw-r--r-- | libgo/go/http/request_test.go | 155 | ||||
-rw-r--r-- | libgo/go/http/requestwrite_test.go | 139 | ||||
-rw-r--r-- | libgo/go/http/response.go | 251 | ||||
-rw-r--r-- | libgo/go/http/response_test.go | 203 | ||||
-rw-r--r-- | libgo/go/http/responsewrite_test.go | 85 | ||||
-rw-r--r-- | libgo/go/http/serve_test.go | 220 | ||||
-rw-r--r-- | libgo/go/http/server.go | 766 | ||||
-rw-r--r-- | libgo/go/http/status.go | 106 | ||||
-rw-r--r-- | libgo/go/http/testdata/file | 1 | ||||
-rw-r--r-- | libgo/go/http/transfer.go | 441 | ||||
-rw-r--r-- | libgo/go/http/url.go | 595 | ||||
-rw-r--r-- | libgo/go/http/url_test.go | 675 |
24 files changed, 5916 insertions, 0 deletions
diff --git a/libgo/go/http/chunked.go b/libgo/go/http/chunked.go new file mode 100644 index 000000000..66195f06b --- /dev/null +++ b/libgo/go/http/chunked.go @@ -0,0 +1,56 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "io" + "os" + "strconv" +) + +// NewChunkedWriter returns a new writer that translates writes into HTTP +// "chunked" format before writing them to w. Closing the returned writer +// sends the final 0-length chunk that marks the end of the stream. +func NewChunkedWriter(w io.Writer) io.WriteCloser { + return &chunkedWriter{w} +} + +// Writing to ChunkedWriter translates to writing in HTTP chunked Transfer +// Encoding wire format to the undering Wire writer. +type chunkedWriter struct { + Wire io.Writer +} + +// Write the contents of data as one chunk to Wire. +// NOTE: Note that the corresponding chunk-writing procedure in Conn.Write has +// a bug since it does not check for success of io.WriteString +func (cw *chunkedWriter) Write(data []byte) (n int, err os.Error) { + + // Don't send 0-length data. It looks like EOF for chunked encoding. + if len(data) == 0 { + return 0, nil + } + + head := strconv.Itob(len(data), 16) + "\r\n" + + if _, err = io.WriteString(cw.Wire, head); err != nil { + return 0, err + } + if n, err = cw.Wire.Write(data); err != nil { + return + } + if n != len(data) { + err = io.ErrShortWrite + return + } + _, err = io.WriteString(cw.Wire, "\r\n") + + return +} + +func (cw *chunkedWriter) Close() os.Error { + _, err := io.WriteString(cw.Wire, "0\r\n") + return err +} diff --git a/libgo/go/http/client.go b/libgo/go/http/client.go new file mode 100644 index 000000000..022f4f124 --- /dev/null +++ b/libgo/go/http/client.go @@ -0,0 +1,236 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Primitive HTTP client. See RFC 2616. + +package http + +import ( + "bufio" + "bytes" + "crypto/tls" + "encoding/base64" + "fmt" + "io" + "net" + "os" + "strconv" + "strings" +) + +// Given a string of the form "host", "host:port", or "[ipv6::address]:port", +// return true if the string includes a port. +func hasPort(s string) bool { return strings.LastIndex(s, ":") > strings.LastIndex(s, "]") } + +// Used in Send to implement io.ReadCloser by bundling together the +// bufio.Reader through which we read the response, and the underlying +// network connection. +type readClose struct { + io.Reader + io.Closer +} + +// Send issues an HTTP request. Caller should close resp.Body when done reading it. +// +// TODO: support persistent connections (multiple requests on a single connection). +// send() method is nonpublic because, when we refactor the code for persistent +// connections, it may no longer make sense to have a method with this signature. +func send(req *Request) (resp *Response, err os.Error) { + if req.URL.Scheme != "http" && req.URL.Scheme != "https" { + return nil, &badStringError{"unsupported protocol scheme", req.URL.Scheme} + } + + addr := req.URL.Host + if !hasPort(addr) { + addr += ":" + req.URL.Scheme + } + info := req.URL.RawUserinfo + if len(info) > 0 { + enc := base64.URLEncoding + encoded := make([]byte, enc.EncodedLen(len(info))) + enc.Encode(encoded, []byte(info)) + if req.Header == nil { + req.Header = make(map[string]string) + } + req.Header["Authorization"] = "Basic " + string(encoded) + } + + var conn io.ReadWriteCloser + if req.URL.Scheme == "http" { + conn, err = net.Dial("tcp", "", addr) + if err != nil { + return nil, err + } + } else { // https + conn, err = tls.Dial("tcp", "", addr, nil) + if err != nil { + return nil, err + } + h := req.URL.Host + if hasPort(h) { + h = h[0:strings.LastIndex(h, ":")] + } + if err := conn.(*tls.Conn).VerifyHostname(h); err != nil { + return nil, err + } + } + + err = req.Write(conn) + if err != nil { + conn.Close() + return nil, err + } + + reader := bufio.NewReader(conn) + resp, err = ReadResponse(reader, req.Method) + if err != nil { + conn.Close() + return nil, err + } + + resp.Body = readClose{resp.Body, conn} + + return +} + +// True if the specified HTTP status code is one for which the Get utility should +// automatically redirect. +func shouldRedirect(statusCode int) bool { + switch statusCode { + case StatusMovedPermanently, StatusFound, StatusSeeOther, StatusTemporaryRedirect: + return true + } + return false +} + +// Get issues a GET to the specified URL. If the response is one of the following +// redirect codes, it follows the redirect, up to a maximum of 10 redirects: +// +// 301 (Moved Permanently) +// 302 (Found) +// 303 (See Other) +// 307 (Temporary Redirect) +// +// finalURL is the URL from which the response was fetched -- identical to the +// input URL unless redirects were followed. +// +// Caller should close r.Body when done reading it. +func Get(url string) (r *Response, finalURL string, err os.Error) { + // TODO: if/when we add cookie support, the redirected request shouldn't + // necessarily supply the same cookies as the original. + // TODO: set referrer header on redirects. + var base *URL + for redirect := 0; ; redirect++ { + if redirect >= 10 { + err = os.ErrorString("stopped after 10 redirects") + break + } + + var req Request + if base == nil { + req.URL, err = ParseURL(url) + } else { + req.URL, err = base.ParseURL(url) + } + if err != nil { + break + } + url = req.URL.String() + if r, err = send(&req); err != nil { + break + } + if shouldRedirect(r.StatusCode) { + r.Body.Close() + if url = r.GetHeader("Location"); url == "" { + err = os.ErrorString(fmt.Sprintf("%d response missing Location header", r.StatusCode)) + break + } + base = req.URL + continue + } + finalURL = url + return + } + + err = &URLError{"Get", url, err} + return +} + +// Post issues a POST to the specified URL. +// +// Caller should close r.Body when done reading it. +func Post(url string, bodyType string, body io.Reader) (r *Response, err os.Error) { + var req Request + req.Method = "POST" + req.ProtoMajor = 1 + req.ProtoMinor = 1 + req.Close = true + req.Body = nopCloser{body} + req.Header = map[string]string{ + "Content-Type": bodyType, + } + req.TransferEncoding = []string{"chunked"} + + req.URL, err = ParseURL(url) + if err != nil { + return nil, err + } + + return send(&req) +} + +// PostForm issues a POST to the specified URL, +// with data's keys and values urlencoded as the request body. +// +// Caller should close r.Body when done reading it. +func PostForm(url string, data map[string]string) (r *Response, err os.Error) { + var req Request + req.Method = "POST" + req.ProtoMajor = 1 + req.ProtoMinor = 1 + req.Close = true + body := urlencode(data) + req.Body = nopCloser{body} + req.Header = map[string]string{ + "Content-Type": "application/x-www-form-urlencoded", + "Content-Length": strconv.Itoa(body.Len()), + } + req.ContentLength = int64(body.Len()) + + req.URL, err = ParseURL(url) + if err != nil { + return nil, err + } + + return send(&req) +} + +// TODO: remove this function when PostForm takes a multimap. +func urlencode(data map[string]string) (b *bytes.Buffer) { + m := make(map[string][]string, len(data)) + for k, v := range data { + m[k] = []string{v} + } + return bytes.NewBuffer([]byte(EncodeQuery(m))) +} + +// Head issues a HEAD to the specified URL. +func Head(url string) (r *Response, err os.Error) { + var req Request + req.Method = "HEAD" + if req.URL, err = ParseURL(url); err != nil { + return + } + url = req.URL.String() + if r, err = send(&req); err != nil { + return + } + return +} + +type nopCloser struct { + io.Reader +} + +func (nopCloser) Close() os.Error { return nil } diff --git a/libgo/go/http/client_test.go b/libgo/go/http/client_test.go new file mode 100644 index 000000000..013653a82 --- /dev/null +++ b/libgo/go/http/client_test.go @@ -0,0 +1,40 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Tests for client.go + +package http + +import ( + "io/ioutil" + "strings" + "testing" +) + +func TestClient(t *testing.T) { + // TODO: add a proper test suite. Current test merely verifies that + // we can retrieve the Google robots.txt file. + + r, _, err := Get("http://www.google.com/robots.txt") + var b []byte + if err == nil { + b, err = ioutil.ReadAll(r.Body) + r.Body.Close() + } + if err != nil { + t.Error(err) + } else if s := string(b); !strings.HasPrefix(s, "User-agent:") { + t.Errorf("Incorrect page body (did not begin with User-agent): %q", s) + } +} + +func TestClientHead(t *testing.T) { + r, err := Head("http://www.google.com/robots.txt") + if err != nil { + t.Fatal(err) + } + if _, ok := r.Header["Last-Modified"]; !ok { + t.Error("Last-Modified header not found.") + } +} diff --git a/libgo/go/http/dump.go b/libgo/go/http/dump.go new file mode 100644 index 000000000..73ac97973 --- /dev/null +++ b/libgo/go/http/dump.go @@ -0,0 +1,76 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "bytes" + "io" + "os" +) + + +// One of the copies, say from b to r2, could be avoided by using a more +// elaborate trick where the other copy is made during Request/Response.Write. +// This would complicate things too much, given that these functions are for +// debugging only. +func drainBody(b io.ReadCloser) (r1, r2 io.ReadCloser, err os.Error) { + var buf bytes.Buffer + if _, err = buf.ReadFrom(b); err != nil { + return nil, nil, err + } + if err = b.Close(); err != nil { + return nil, nil, err + } + return nopCloser{&buf}, nopCloser{bytes.NewBuffer(buf.Bytes())}, nil +} + +// DumpRequest returns the wire representation of req, +// optionally including the request body, for debugging. +// DumpRequest is semantically a no-op, but in order to +// dump the body, it reads the body data into memory and +// changes req.Body to refer to the in-memory copy. +func DumpRequest(req *Request, body bool) (dump []byte, err os.Error) { + var b bytes.Buffer + save := req.Body + if !body || req.Body == nil { + req.Body = nil + } else { + save, req.Body, err = drainBody(req.Body) + if err != nil { + return + } + } + err = req.Write(&b) + req.Body = save + if err != nil { + return + } + dump = b.Bytes() + return +} + +// DumpResponse is like DumpRequest but dumps a response. +func DumpResponse(resp *Response, body bool) (dump []byte, err os.Error) { + var b bytes.Buffer + save := resp.Body + savecl := resp.ContentLength + if !body || resp.Body == nil { + resp.Body = nil + resp.ContentLength = 0 + } else { + save, resp.Body, err = drainBody(resp.Body) + if err != nil { + return + } + } + err = resp.Write(&b) + resp.Body = save + resp.ContentLength = savecl + if err != nil { + return + } + dump = b.Bytes() + return +} diff --git a/libgo/go/http/fs.go b/libgo/go/http/fs.go new file mode 100644 index 000000000..bbfa58d26 --- /dev/null +++ b/libgo/go/http/fs.go @@ -0,0 +1,265 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// HTTP file system request handler + +package http + +import ( + "fmt" + "io" + "mime" + "os" + "path" + "strconv" + "strings" + "time" + "utf8" +) + +// Heuristic: b is text if it is valid UTF-8 and doesn't +// contain any unprintable ASCII or Unicode characters. +func isText(b []byte) bool { + for len(b) > 0 && utf8.FullRune(b) { + rune, size := utf8.DecodeRune(b) + if size == 1 && rune == utf8.RuneError { + // decoding error + return false + } + if 0x7F <= rune && rune <= 0x9F { + return false + } + if rune < ' ' { + switch rune { + case '\n', '\r', '\t': + // okay + default: + // binary garbage + return false + } + } + b = b[size:] + } + return true +} + +func dirList(w ResponseWriter, f *os.File) { + fmt.Fprintf(w, "<pre>\n") + for { + dirs, err := f.Readdir(100) + if err != nil || len(dirs) == 0 { + break + } + for _, d := range dirs { + name := d.Name + if d.IsDirectory() { + name += "/" + } + // TODO htmlescape + fmt.Fprintf(w, "<a href=\"%s\">%s</a>\n", name, name) + } + } + fmt.Fprintf(w, "</pre>\n") +} + +func serveFile(w ResponseWriter, r *Request, name string, redirect bool) { + const indexPage = "/index.html" + + // redirect .../index.html to .../ + if strings.HasSuffix(r.URL.Path, indexPage) { + Redirect(w, r, r.URL.Path[0:len(r.URL.Path)-len(indexPage)+1], StatusMovedPermanently) + return + } + + f, err := os.Open(name, os.O_RDONLY, 0) + if err != nil { + // TODO expose actual error? + NotFound(w, r) + return + } + defer f.Close() + + d, err1 := f.Stat() + if err1 != nil { + // TODO expose actual error? + NotFound(w, r) + return + } + + if redirect { + // redirect to canonical path: / at end of directory url + // r.URL.Path always begins with / + url := r.URL.Path + if d.IsDirectory() { + if url[len(url)-1] != '/' { + Redirect(w, r, url+"/", StatusMovedPermanently) + return + } + } else { + if url[len(url)-1] == '/' { + Redirect(w, r, url[0:len(url)-1], StatusMovedPermanently) + return + } + } + } + + if t, _ := time.Parse(TimeFormat, r.Header["If-Modified-Since"]); t != nil && d.Mtime_ns/1e9 <= t.Seconds() { + w.WriteHeader(StatusNotModified) + return + } + w.SetHeader("Last-Modified", time.SecondsToUTC(d.Mtime_ns/1e9).Format(TimeFormat)) + + // use contents of index.html for directory, if present + if d.IsDirectory() { + index := name + indexPage + ff, err := os.Open(index, os.O_RDONLY, 0) + if err == nil { + defer ff.Close() + dd, err := ff.Stat() + if err == nil { + name = index + d = dd + f = ff + } + } + } + + if d.IsDirectory() { + dirList(w, f) + return + } + + // serve file + size := d.Size + code := StatusOK + + // use extension to find content type. + ext := path.Ext(name) + if ctype := mime.TypeByExtension(ext); ctype != "" { + w.SetHeader("Content-Type", ctype) + } else { + // read first chunk to decide between utf-8 text and binary + var buf [1024]byte + n, _ := io.ReadFull(f, buf[:]) + b := buf[:n] + if isText(b) { + w.SetHeader("Content-Type", "text-plain; charset=utf-8") + } else { + w.SetHeader("Content-Type", "application/octet-stream") // generic binary + } + f.Seek(0, 0) // rewind to output whole file + } + + // handle Content-Range header. + // TODO(adg): handle multiple ranges + ranges, err := parseRange(r.Header["Range"], size) + if err != nil || len(ranges) > 1 { + Error(w, err.String(), StatusRequestedRangeNotSatisfiable) + return + } + if len(ranges) == 1 { + ra := ranges[0] + if _, err := f.Seek(ra.start, 0); err != nil { + Error(w, err.String(), StatusRequestedRangeNotSatisfiable) + return + } + size = ra.length + code = StatusPartialContent + w.SetHeader("Content-Range", fmt.Sprintf("bytes %d-%d/%d", ra.start, ra.start+ra.length-1, d.Size)) + } + + w.SetHeader("Accept-Ranges", "bytes") + w.SetHeader("Content-Length", strconv.Itoa64(size)) + + w.WriteHeader(code) + + if r.Method != "HEAD" { + io.Copyn(w, f, size) + } +} + +// ServeFile replies to the request with the contents of the named file or directory. +func ServeFile(w ResponseWriter, r *Request, name string) { + serveFile(w, r, name, false) +} + +type fileHandler struct { + root string + prefix string +} + +// FileServer returns a handler that serves HTTP requests +// with the contents of the file system rooted at root. +// It strips prefix from the incoming requests before +// looking up the file name in the file system. +func FileServer(root, prefix string) Handler { return &fileHandler{root, prefix} } + +func (f *fileHandler) ServeHTTP(w ResponseWriter, r *Request) { + path := r.URL.Path + if !strings.HasPrefix(path, f.prefix) { + NotFound(w, r) + return + } + path = path[len(f.prefix):] + serveFile(w, r, f.root+"/"+path, true) +} + +// httpRange specifies the byte range to be sent to the client. +type httpRange struct { + start, length int64 +} + +// parseRange parses a Range header string as per RFC 2616. +func parseRange(s string, size int64) ([]httpRange, os.Error) { + if s == "" { + return nil, nil // header not present + } + const b = "bytes=" + if !strings.HasPrefix(s, b) { + return nil, os.NewError("invalid range") + } + var ranges []httpRange + for _, ra := range strings.Split(s[len(b):], ",", -1) { + i := strings.Index(ra, "-") + if i < 0 { + return nil, os.NewError("invalid range") + } + start, end := ra[:i], ra[i+1:] + var r httpRange + if start == "" { + // If no start is specified, end specifies the + // range start relative to the end of the file. + i, err := strconv.Atoi64(end) + if err != nil { + return nil, os.NewError("invalid range") + } + if i > size { + i = size + } + r.start = size - i + r.length = size - r.start + } else { + i, err := strconv.Atoi64(start) + if err != nil || i > size || i < 0 { + return nil, os.NewError("invalid range") + } + r.start = i + if end == "" { + // If no end is specified, range extends to end of the file. + r.length = size - r.start + } else { + i, err := strconv.Atoi64(end) + if err != nil || r.start > i { + return nil, os.NewError("invalid range") + } + if i >= size { + i = size - 1 + } + r.length = i - r.start + 1 + } + } + ranges = append(ranges, r) + } + return ranges, nil +} diff --git a/libgo/go/http/fs_test.go b/libgo/go/http/fs_test.go new file mode 100644 index 000000000..0a5636b88 --- /dev/null +++ b/libgo/go/http/fs_test.go @@ -0,0 +1,172 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "fmt" + "io/ioutil" + "net" + "os" + "sync" + "testing" +) + +var ParseRangeTests = []struct { + s string + length int64 + r []httpRange +}{ + {"", 0, nil}, + {"foo", 0, nil}, + {"bytes=", 0, nil}, + {"bytes=5-4", 10, nil}, + {"bytes=0-2,5-4", 10, nil}, + {"bytes=0-9", 10, []httpRange{{0, 10}}}, + {"bytes=0-", 10, []httpRange{{0, 10}}}, + {"bytes=5-", 10, []httpRange{{5, 5}}}, + {"bytes=0-20", 10, []httpRange{{0, 10}}}, + {"bytes=15-,0-5", 10, nil}, + {"bytes=-5", 10, []httpRange{{5, 5}}}, + {"bytes=-15", 10, []httpRange{{0, 10}}}, + {"bytes=0-499", 10000, []httpRange{{0, 500}}}, + {"bytes=500-999", 10000, []httpRange{{500, 500}}}, + {"bytes=-500", 10000, []httpRange{{9500, 500}}}, + {"bytes=9500-", 10000, []httpRange{{9500, 500}}}, + {"bytes=0-0,-1", 10000, []httpRange{{0, 1}, {9999, 1}}}, + {"bytes=500-600,601-999", 10000, []httpRange{{500, 101}, {601, 399}}}, + {"bytes=500-700,601-999", 10000, []httpRange{{500, 201}, {601, 399}}}, +} + +func TestParseRange(t *testing.T) { + for _, test := range ParseRangeTests { + r := test.r + ranges, err := parseRange(test.s, test.length) + if err != nil && r != nil { + t.Errorf("parseRange(%q) returned error %q", test.s, err) + } + if len(ranges) != len(r) { + t.Errorf("len(parseRange(%q)) = %d, want %d", test.s, len(ranges), len(r)) + continue + } + for i := range r { + if ranges[i].start != r[i].start { + t.Errorf("parseRange(%q)[%d].start = %d, want %d", test.s, i, ranges[i].start, r[i].start) + } + if ranges[i].length != r[i].length { + t.Errorf("parseRange(%q)[%d].length = %d, want %d", test.s, i, ranges[i].length, r[i].length) + } + } + } +} + +const ( + testFile = "testdata/file" + testFileLength = 11 +) + +var ( + serverOnce sync.Once + serverAddr string +) + +func startServer(t *testing.T) { + serverOnce.Do(func() { + HandleFunc("/ServeFile", func(w ResponseWriter, r *Request) { + ServeFile(w, r, "testdata/file") + }) + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatal("listen:", err) + } + serverAddr = l.Addr().String() + go Serve(l, nil) + }) +} + +var ServeFileRangeTests = []struct { + start, end int + r string + code int +}{ + {0, testFileLength, "", StatusOK}, + {0, 5, "0-4", StatusPartialContent}, + {2, testFileLength, "2-", StatusPartialContent}, + {testFileLength - 5, testFileLength, "-5", StatusPartialContent}, + {3, 8, "3-7", StatusPartialContent}, + {0, 0, "20-", StatusRequestedRangeNotSatisfiable}, +} + +func TestServeFile(t *testing.T) { + startServer(t) + var err os.Error + + file, err := ioutil.ReadFile(testFile) + if err != nil { + t.Fatal("reading file:", err) + } + + // set up the Request (re-used for all tests) + var req Request + req.Header = make(map[string]string) + if req.URL, err = ParseURL("http://" + serverAddr + "/ServeFile"); err != nil { + t.Fatal("ParseURL:", err) + } + req.Method = "GET" + + // straight GET + _, body := getBody(t, req) + if !equal(body, file) { + t.Fatalf("body mismatch: got %q, want %q", body, file) + } + + // Range tests + for _, rt := range ServeFileRangeTests { + req.Header["Range"] = "bytes=" + rt.r + if rt.r == "" { + req.Header["Range"] = "" + } + r, body := getBody(t, req) + if r.StatusCode != rt.code { + t.Errorf("range=%q: StatusCode=%d, want %d", rt.r, r.StatusCode, rt.code) + } + if rt.code == StatusRequestedRangeNotSatisfiable { + continue + } + h := fmt.Sprintf("bytes %d-%d/%d", rt.start, rt.end-1, testFileLength) + if rt.r == "" { + h = "" + } + if r.Header["Content-Range"] != h { + t.Errorf("header mismatch: range=%q: got %q, want %q", rt.r, r.Header["Content-Range"], h) + } + if !equal(body, file[rt.start:rt.end]) { + t.Errorf("body mismatch: range=%q: got %q, want %q", rt.r, body, file[rt.start:rt.end]) + } + } +} + +func getBody(t *testing.T, req Request) (*Response, []byte) { + r, err := send(&req) + if err != nil { + t.Fatal(req.URL.String(), "send:", err) + } + b, err := ioutil.ReadAll(r.Body) + if err != nil { + t.Fatal("reading Body:", err) + } + return r, b +} + +func equal(a, b []byte) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/libgo/go/http/lex.go b/libgo/go/http/lex.go new file mode 100644 index 000000000..93b67e701 --- /dev/null +++ b/libgo/go/http/lex.go @@ -0,0 +1,144 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +// This file deals with lexical matters of HTTP + +func isSeparator(c byte) bool { + switch c { + case '(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=', '{', '}', ' ', '\t': + return true + } + return false +} + +func isSpace(c byte) bool { + switch c { + case ' ', '\t', '\r', '\n': + return true + } + return false +} + +func isCtl(c byte) bool { return (0 <= c && c <= 31) || c == 127 } + +func isChar(c byte) bool { return 0 <= c && c <= 127 } + +func isAnyText(c byte) bool { return !isCtl(c) } + +func isQdText(c byte) bool { return isAnyText(c) && c != '"' } + +func isToken(c byte) bool { return isChar(c) && !isCtl(c) && !isSeparator(c) } + +// Valid escaped sequences are not specified in RFC 2616, so for now, we assume +// that they coincide with the common sense ones used by GO. Malformed +// characters should probably not be treated as errors by a robust (forgiving) +// parser, so we replace them with the '?' character. +func httpUnquotePair(b byte) byte { + // skip the first byte, which should always be '\' + switch b { + case 'a': + return '\a' + case 'b': + return '\b' + case 'f': + return '\f' + case 'n': + return '\n' + case 'r': + return '\r' + case 't': + return '\t' + case 'v': + return '\v' + case '\\': + return '\\' + case '\'': + return '\'' + case '"': + return '"' + } + return '?' +} + +// raw must begin with a valid quoted string. Only the first quoted string is +// parsed and is unquoted in result. eaten is the number of bytes parsed, or -1 +// upon failure. +func httpUnquote(raw []byte) (eaten int, result string) { + buf := make([]byte, len(raw)) + if raw[0] != '"' { + return -1, "" + } + eaten = 1 + j := 0 // # of bytes written in buf + for i := 1; i < len(raw); i++ { + switch b := raw[i]; b { + case '"': + eaten++ + buf = buf[0:j] + return i + 1, string(buf) + case '\\': + if len(raw) < i+2 { + return -1, "" + } + buf[j] = httpUnquotePair(raw[i+1]) + eaten += 2 + j++ + i++ + default: + if isQdText(b) { + buf[j] = b + } else { + buf[j] = '?' + } + eaten++ + j++ + } + } + return -1, "" +} + +// This is a best effort parse, so errors are not returned, instead not all of +// the input string might be parsed. result is always non-nil. +func httpSplitFieldValue(fv string) (eaten int, result []string) { + result = make([]string, 0, len(fv)) + raw := []byte(fv) + i := 0 + chunk := "" + for i < len(raw) { + b := raw[i] + switch { + case b == '"': + eaten, unq := httpUnquote(raw[i:len(raw)]) + if eaten < 0 { + return i, result + } else { + i += eaten + chunk += unq + } + case isSeparator(b): + if chunk != "" { + result = result[0 : len(result)+1] + result[len(result)-1] = chunk + chunk = "" + } + i++ + case isToken(b): + chunk += string(b) + i++ + case b == '\n' || b == '\r': + i++ + default: + chunk += "?" + i++ + } + } + if chunk != "" { + result = result[0 : len(result)+1] + result[len(result)-1] = chunk + chunk = "" + } + return i, result +} diff --git a/libgo/go/http/lex_test.go b/libgo/go/http/lex_test.go new file mode 100644 index 000000000..5386f7534 --- /dev/null +++ b/libgo/go/http/lex_test.go @@ -0,0 +1,70 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "testing" +) + +type lexTest struct { + Raw string + Parsed int // # of parsed characters + Result []string +} + +var lexTests = []lexTest{ + { + Raw: `"abc"def,:ghi`, + Parsed: 13, + Result: []string{"abcdef", "ghi"}, + }, + // My understanding of the RFC is that escape sequences outside of + // quotes are not interpreted? + { + Raw: `"\t"\t"\t"`, + Parsed: 10, + Result: []string{"\t", "t\t"}, + }, + { + Raw: `"\yab"\r\n`, + Parsed: 10, + Result: []string{"?ab", "r", "n"}, + }, + { + Raw: "ab\f", + Parsed: 3, + Result: []string{"ab?"}, + }, + { + Raw: "\"ab \" c,de f, gh, ij\n\t\r", + Parsed: 23, + Result: []string{"ab ", "c", "de", "f", "gh", "ij"}, + }, +} + +func min(x, y int) int { + if x <= y { + return x + } + return y +} + +func TestSplitFieldValue(t *testing.T) { + for k, l := range lexTests { + parsed, result := httpSplitFieldValue(l.Raw) + if parsed != l.Parsed { + t.Errorf("#%d: Parsed %d, expected %d", k, parsed, l.Parsed) + } + if len(result) != len(l.Result) { + t.Errorf("#%d: Result len %d, expected %d", k, len(result), len(l.Result)) + } + for i := 0; i < min(len(result), len(l.Result)); i++ { + if result[i] != l.Result[i] { + t.Errorf("#%d: %d-th entry mismatch. Have {%s}, expect {%s}", + k, i, result[i], l.Result[i]) + } + } + } +} diff --git a/libgo/go/http/persist.go b/libgo/go/http/persist.go new file mode 100644 index 000000000..8bfc09755 --- /dev/null +++ b/libgo/go/http/persist.go @@ -0,0 +1,303 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "bufio" + "container/list" + "io" + "net" + "os" + "sync" +) + +var ErrPersistEOF = &ProtocolError{"persistent connection closed"} + +// A ServerConn reads requests and sends responses over an underlying +// connection, until the HTTP keepalive logic commands an end. ServerConn +// does not close the underlying connection. Instead, the user calls Close +// and regains control over the connection. ServerConn supports pipe-lining, +// i.e. requests can be read out of sync (but in the same order) while the +// respective responses are sent. +type ServerConn struct { + c net.Conn + r *bufio.Reader + clsd bool // indicates a graceful close + re, we os.Error // read/write errors + lastBody io.ReadCloser + nread, nwritten int + lk sync.Mutex // protected read/write to re,we +} + +// NewServerConn returns a new ServerConn reading and writing c. If r is not +// nil, it is the buffer to use when reading c. +func NewServerConn(c net.Conn, r *bufio.Reader) *ServerConn { + if r == nil { + r = bufio.NewReader(c) + } + return &ServerConn{c: c, r: r} +} + +// Close detaches the ServerConn and returns the underlying connection as well +// as the read-side bufio which may have some left over data. Close may be +// called before Read has signaled the end of the keep-alive logic. The user +// should not call Close while Read or Write is in progress. +func (sc *ServerConn) Close() (c net.Conn, r *bufio.Reader) { + sc.lk.Lock() + defer sc.lk.Unlock() + c = sc.c + r = sc.r + sc.c = nil + sc.r = nil + return +} + +// Read returns the next request on the wire. An ErrPersistEOF is returned if +// it is gracefully determined that there are no more requests (e.g. after the +// first request on an HTTP/1.0 connection, or after a Connection:close on a +// HTTP/1.1 connection). Read can be called concurrently with Write, but not +// with another Read. +func (sc *ServerConn) Read() (req *Request, err os.Error) { + + sc.lk.Lock() + if sc.we != nil { // no point receiving if write-side broken or closed + defer sc.lk.Unlock() + return nil, sc.we + } + if sc.re != nil { + defer sc.lk.Unlock() + return nil, sc.re + } + sc.lk.Unlock() + + // Make sure body is fully consumed, even if user does not call body.Close + if sc.lastBody != nil { + // body.Close is assumed to be idempotent and multiple calls to + // it should return the error that its first invokation + // returned. + err = sc.lastBody.Close() + sc.lastBody = nil + if err != nil { + sc.lk.Lock() + defer sc.lk.Unlock() + sc.re = err + return nil, err + } + } + + req, err = ReadRequest(sc.r) + if err != nil { + sc.lk.Lock() + defer sc.lk.Unlock() + if err == io.ErrUnexpectedEOF { + // A close from the opposing client is treated as a + // graceful close, even if there was some unparse-able + // data before the close. + sc.re = ErrPersistEOF + return nil, sc.re + } else { + sc.re = err + return + } + } + sc.lastBody = req.Body + sc.nread++ + if req.Close { + sc.lk.Lock() + defer sc.lk.Unlock() + sc.re = ErrPersistEOF + return req, sc.re + } + return +} + +// Pending returns the number of unanswered requests +// that have been received on the connection. +func (sc *ServerConn) Pending() int { + sc.lk.Lock() + defer sc.lk.Unlock() + return sc.nread - sc.nwritten +} + +// Write writes a repsonse. To close the connection gracefully, set the +// Response.Close field to true. Write should be considered operational until +// it returns an error, regardless of any errors returned on the Read side. +// Write can be called concurrently with Read, but not with another Write. +func (sc *ServerConn) Write(resp *Response) os.Error { + + sc.lk.Lock() + if sc.we != nil { + defer sc.lk.Unlock() + return sc.we + } + sc.lk.Unlock() + if sc.nread <= sc.nwritten { + return os.NewError("persist server pipe count") + } + + if resp.Close { + // After signaling a keep-alive close, any pipelined unread + // requests will be lost. It is up to the user to drain them + // before signaling. + sc.lk.Lock() + sc.re = ErrPersistEOF + sc.lk.Unlock() + } + + err := resp.Write(sc.c) + if err != nil { + sc.lk.Lock() + defer sc.lk.Unlock() + sc.we = err + return err + } + sc.nwritten++ + + return nil +} + +// A ClientConn sends request and receives headers over an underlying +// connection, while respecting the HTTP keepalive logic. ClientConn is not +// responsible for closing the underlying connection. One must call Close to +// regain control of that connection and deal with it as desired. +type ClientConn struct { + c net.Conn + r *bufio.Reader + re, we os.Error // read/write errors + lastBody io.ReadCloser + nread, nwritten int + reqm list.List // request methods in order of execution + lk sync.Mutex // protects read/write to reqm,re,we +} + +// NewClientConn returns a new ClientConn reading and writing c. If r is not +// nil, it is the buffer to use when reading c. +func NewClientConn(c net.Conn, r *bufio.Reader) *ClientConn { + if r == nil { + r = bufio.NewReader(c) + } + return &ClientConn{c: c, r: r} +} + +// Close detaches the ClientConn and returns the underlying connection as well +// as the read-side bufio which may have some left over data. Close may be +// called before the user or Read have signaled the end of the keep-alive +// logic. The user should not call Close while Read or Write is in progress. +func (cc *ClientConn) Close() (c net.Conn, r *bufio.Reader) { + cc.lk.Lock() + c = cc.c + r = cc.r + cc.c = nil + cc.r = nil + cc.reqm.Init() + cc.lk.Unlock() + return +} + +// Write writes a request. An ErrPersistEOF error is returned if the connection +// has been closed in an HTTP keepalive sense. If req.Close equals true, the +// keepalive connection is logically closed after this request and the opposing +// server is informed. An ErrUnexpectedEOF indicates the remote closed the +// underlying TCP connection, which is usually considered as graceful close. +// Write can be called concurrently with Read, but not with another Write. +func (cc *ClientConn) Write(req *Request) os.Error { + + cc.lk.Lock() + if cc.re != nil { // no point sending if read-side closed or broken + defer cc.lk.Unlock() + return cc.re + } + if cc.we != nil { + defer cc.lk.Unlock() + return cc.we + } + cc.lk.Unlock() + + if req.Close { + // We write the EOF to the write-side error, because there + // still might be some pipelined reads + cc.lk.Lock() + cc.we = ErrPersistEOF + cc.lk.Unlock() + } + + err := req.Write(cc.c) + if err != nil { + cc.lk.Lock() + defer cc.lk.Unlock() + cc.we = err + return err + } + cc.nwritten++ + cc.lk.Lock() + cc.reqm.PushBack(req.Method) + cc.lk.Unlock() + + return nil +} + +// Pending returns the number of unanswered requests +// that have been sent on the connection. +func (cc *ClientConn) Pending() int { + cc.lk.Lock() + defer cc.lk.Unlock() + return cc.nwritten - cc.nread +} + +// Read reads the next response from the wire. A valid response might be +// returned together with an ErrPersistEOF, which means that the remote +// requested that this be the last request serviced. Read can be called +// concurrently with Write, but not with another Read. +func (cc *ClientConn) Read() (resp *Response, err os.Error) { + + cc.lk.Lock() + if cc.re != nil { + defer cc.lk.Unlock() + return nil, cc.re + } + cc.lk.Unlock() + + if cc.nread >= cc.nwritten { + return nil, os.NewError("persist client pipe count") + } + + // Make sure body is fully consumed, even if user does not call body.Close + if cc.lastBody != nil { + // body.Close is assumed to be idempotent and multiple calls to + // it should return the error that its first invokation + // returned. + err = cc.lastBody.Close() + cc.lastBody = nil + if err != nil { + cc.lk.Lock() + defer cc.lk.Unlock() + cc.re = err + return nil, err + } + } + + cc.lk.Lock() + m := cc.reqm.Front() + cc.reqm.Remove(m) + cc.lk.Unlock() + resp, err = ReadResponse(cc.r, m.Value.(string)) + if err != nil { + cc.lk.Lock() + defer cc.lk.Unlock() + cc.re = err + return + } + cc.lastBody = resp.Body + + cc.nread++ + + if resp.Close { + cc.lk.Lock() + defer cc.lk.Unlock() + cc.re = ErrPersistEOF // don't send any more requests + return resp, cc.re + } + return +} diff --git a/libgo/go/http/pprof/pprof.go b/libgo/go/http/pprof/pprof.go new file mode 100644 index 000000000..f7db9aab9 --- /dev/null +++ b/libgo/go/http/pprof/pprof.go @@ -0,0 +1,92 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package pprof serves via its HTTP server runtime profiling data +// in the format expected by the pprof visualization tool. +// For more information about pprof, see +// http://code.google.com/p/google-perftools/. +// +// The package is typically only imported for the side effect of +// registering its HTTP handlers. +// The handled paths all begin with /debug/pprof/. +// +// To use pprof, link this package into your program: +// import _ "http/pprof" +// +// Then use the pprof tool to look at the heap profile: +// +// pprof http://localhost:6060/debug/pprof/heap +// +package pprof + +import ( + "bufio" + "fmt" + "http" + "os" + "runtime" + "runtime/pprof" + "strconv" + "strings" +) + +func init() { + http.Handle("/debug/pprof/cmdline", http.HandlerFunc(Cmdline)) + http.Handle("/debug/pprof/heap", http.HandlerFunc(Heap)) + http.Handle("/debug/pprof/symbol", http.HandlerFunc(Symbol)) +} + +// Cmdline responds with the running program's +// command line, with arguments separated by NUL bytes. +// The package initialization registers it as /debug/pprof/cmdline. +func Cmdline(w http.ResponseWriter, r *http.Request) { + w.SetHeader("content-type", "text/plain; charset=utf-8") + fmt.Fprintf(w, strings.Join(os.Args, "\x00")) +} + +// Heap responds with the pprof-formatted heap profile. +// The package initialization registers it as /debug/pprof/heap. +func Heap(w http.ResponseWriter, r *http.Request) { + w.SetHeader("content-type", "text/plain; charset=utf-8") + pprof.WriteHeapProfile(w) +} + +// Symbol looks up the program counters listed in the request, +// responding with a table mapping program counters to function names. +// The package initialization registers it as /debug/pprof/symbol. +func Symbol(w http.ResponseWriter, r *http.Request) { + w.SetHeader("content-type", "text/plain; charset=utf-8") + + // We don't know how many symbols we have, but we + // do have symbol information. Pprof only cares whether + // this number is 0 (no symbols available) or > 0. + fmt.Fprintf(w, "num_symbols: 1\n") + + var b *bufio.Reader + if r.Method == "POST" { + b = bufio.NewReader(r.Body) + } else { + b = bufio.NewReader(strings.NewReader(r.URL.RawQuery)) + } + + for { + word, err := b.ReadSlice('+') + if err == nil { + word = word[0 : len(word)-1] // trim + + } + pc, _ := strconv.Btoui64(string(word), 0) + if pc != 0 { + f := runtime.FuncForPC(uintptr(pc)) + if f != nil { + fmt.Fprintf(w, "%#x %s\n", pc, f.Name()) + } + } + + // Wait until here to check for err; the last + // symbol will have an err because it doesn't end in +. + if err != nil { + break + } + } +} diff --git a/libgo/go/http/readrequest_test.go b/libgo/go/http/readrequest_test.go new file mode 100644 index 000000000..5e1cbcbcb --- /dev/null +++ b/libgo/go/http/readrequest_test.go @@ -0,0 +1,132 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "bufio" + "bytes" + "fmt" + "io" + "testing" +) + +type reqTest struct { + Raw string + Req Request + Body string +} + +var reqTests = []reqTest{ + // Baseline test; All Request fields included for template use + { + "GET http://www.techcrunch.com/ HTTP/1.1\r\n" + + "Host: www.techcrunch.com\r\n" + + "User-Agent: Fake\r\n" + + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" + + "Accept-Language: en-us,en;q=0.5\r\n" + + "Accept-Encoding: gzip,deflate\r\n" + + "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n" + + "Keep-Alive: 300\r\n" + + "Content-Length: 7\r\n" + + "Proxy-Connection: keep-alive\r\n\r\n" + + "abcdef\n???", + + Request{ + Method: "GET", + RawURL: "http://www.techcrunch.com/", + URL: &URL{ + Raw: "http://www.techcrunch.com/", + Scheme: "http", + RawPath: "/", + RawAuthority: "www.techcrunch.com", + RawUserinfo: "", + Host: "www.techcrunch.com", + Path: "/", + RawQuery: "", + Fragment: "", + }, + Proto: "HTTP/1.1", + ProtoMajor: 1, + ProtoMinor: 1, + Header: map[string]string{ + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-us,en;q=0.5", + "Accept-Encoding": "gzip,deflate", + "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7", + "Keep-Alive": "300", + "Proxy-Connection": "keep-alive", + "Content-Length": "7", + }, + Close: false, + ContentLength: 7, + Host: "www.techcrunch.com", + Referer: "", + UserAgent: "Fake", + Form: map[string][]string{}, + }, + + "abcdef\n", + }, + + // Tests that we don't parse a path that looks like a + // scheme-relative URI as a scheme-relative URI. + { + "GET //user@host/is/actually/a/path/ HTTP/1.1\r\n" + + "Host: test\r\n\r\n", + + Request{ + Method: "GET", + RawURL: "//user@host/is/actually/a/path/", + URL: &URL{ + Raw: "//user@host/is/actually/a/path/", + Scheme: "", + RawPath: "//user@host/is/actually/a/path/", + RawAuthority: "", + RawUserinfo: "", + Host: "", + Path: "//user@host/is/actually/a/path/", + RawQuery: "", + Fragment: "", + }, + Proto: "HTTP/1.1", + ProtoMajor: 1, + ProtoMinor: 1, + Header: map[string]string{}, + Close: false, + ContentLength: -1, + Host: "test", + Referer: "", + UserAgent: "", + Form: map[string][]string{}, + }, + + "", + }, +} + +func TestReadRequest(t *testing.T) { + for i := range reqTests { + tt := &reqTests[i] + var braw bytes.Buffer + braw.WriteString(tt.Raw) + req, err := ReadRequest(bufio.NewReader(&braw)) + if err != nil { + t.Errorf("#%d: %s", i, err) + continue + } + rbody := req.Body + req.Body = nil + diff(t, fmt.Sprintf("#%d Request", i), req, &tt.Req) + var bout bytes.Buffer + if rbody != nil { + io.Copy(&bout, rbody) + rbody.Close() + } + body := bout.String() + if body != tt.Body { + t.Errorf("#%d: Body = %q want %q", i, body, tt.Body) + } + } +} diff --git a/libgo/go/http/request.go b/libgo/go/http/request.go new file mode 100644 index 000000000..04bebaaf5 --- /dev/null +++ b/libgo/go/http/request.go @@ -0,0 +1,693 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// HTTP Request reading and parsing. + +// The http package implements parsing of HTTP requests, replies, +// and URLs and provides an extensible HTTP server and a basic +// HTTP client. +package http + +import ( + "bufio" + "bytes" + "container/vector" + "fmt" + "io" + "io/ioutil" + "mime" + "mime/multipart" + "os" + "strconv" + "strings" +) + +const ( + maxLineLength = 4096 // assumed <= bufio.defaultBufSize + maxValueLength = 4096 + maxHeaderLines = 1024 + chunkSize = 4 << 10 // 4 KB chunks +) + +// HTTP request parsing errors. +type ProtocolError struct { + os.ErrorString +} + +var ( + ErrLineTooLong = &ProtocolError{"header line too long"} + ErrHeaderTooLong = &ProtocolError{"header too long"} + ErrShortBody = &ProtocolError{"entity body too short"} + ErrNotSupported = &ProtocolError{"feature not supported"} + ErrUnexpectedTrailer = &ProtocolError{"trailer header without chunked transfer encoding"} + ErrMissingContentLength = &ProtocolError{"missing ContentLength in HEAD response"} + ErrNotMultipart = &ProtocolError{"request Content-Type isn't multipart/form-data"} + ErrMissingBoundary = &ProtocolError{"no multipart boundary param Content-Type"} +) + +type badStringError struct { + what string + str string +} + +func (e *badStringError) String() string { return fmt.Sprintf("%s %q", e.what, e.str) } + +var reqExcludeHeader = map[string]bool{ + "Host": true, + "User-Agent": true, + "Referer": true, + "Content-Length": true, + "Transfer-Encoding": true, + "Trailer": true, +} + +// A Request represents a parsed HTTP request header. +type Request struct { + Method string // GET, POST, PUT, etc. + RawURL string // The raw URL given in the request. + URL *URL // Parsed URL. + Proto string // "HTTP/1.0" + ProtoMajor int // 1 + ProtoMinor int // 0 + + // A header maps request lines to their values. + // If the header says + // + // accept-encoding: gzip, deflate + // Accept-Language: en-us + // Connection: keep-alive + // + // then + // + // Header = map[string]string{ + // "Accept-Encoding": "gzip, deflate", + // "Accept-Language": "en-us", + // "Connection": "keep-alive", + // } + // + // HTTP defines that header names are case-insensitive. + // The request parser implements this by canonicalizing the + // name, making the first character and any characters + // following a hyphen uppercase and the rest lowercase. + Header map[string]string + + // The message body. + Body io.ReadCloser + + // ContentLength records the length of the associated content. + // The value -1 indicates that the length is unknown. + // Values >= 0 indicate that the given number of bytes may be read from Body. + ContentLength int64 + + // TransferEncoding lists the transfer encodings from outermost to innermost. + // An empty list denotes the "identity" encoding. + TransferEncoding []string + + // Whether to close the connection after replying to this request. + Close bool + + // The host on which the URL is sought. + // Per RFC 2616, this is either the value of the Host: header + // or the host name given in the URL itself. + Host string + + // The referring URL, if sent in the request. + // + // Referer is misspelled as in the request itself, + // a mistake from the earliest days of HTTP. + // This value can also be fetched from the Header map + // as Header["Referer"]; the benefit of making it + // available as a structure field is that the compiler + // can diagnose programs that use the alternate + // (correct English) spelling req.Referrer but cannot + // diagnose programs that use Header["Referrer"]. + Referer string + + // The User-Agent: header string, if sent in the request. + UserAgent string + + // The parsed form. Only available after ParseForm is called. + Form map[string][]string + + // Trailer maps trailer keys to values. Like for Header, if the + // response has multiple trailer lines with the same key, they will be + // concatenated, delimited by commas. + Trailer map[string]string +} + +// ProtoAtLeast returns whether the HTTP protocol used +// in the request is at least major.minor. +func (r *Request) ProtoAtLeast(major, minor int) bool { + return r.ProtoMajor > major || + r.ProtoMajor == major && r.ProtoMinor >= minor +} + +// MultipartReader returns a MIME multipart reader if this is a +// multipart/form-data POST request, else returns nil and an error. +func (r *Request) MultipartReader() (multipart.Reader, os.Error) { + v, ok := r.Header["Content-Type"] + if !ok { + return nil, ErrNotMultipart + } + d, params := mime.ParseMediaType(v) + if d != "multipart/form-data" { + return nil, ErrNotMultipart + } + boundary, ok := params["boundary"] + if !ok { + return nil, ErrMissingBoundary + } + return multipart.NewReader(r.Body, boundary), nil +} + +// Return value if nonempty, def otherwise. +func valueOrDefault(value, def string) string { + if value != "" { + return value + } + return def +} + +const defaultUserAgent = "Go http package" + +// Write writes an HTTP/1.1 request -- header and body -- in wire format. +// This method consults the following fields of req: +// Host +// RawURL, if non-empty, or else URL +// Method (defaults to "GET") +// UserAgent (defaults to defaultUserAgent) +// Referer +// Header +// Body +// +// If Body is present, Write forces "Transfer-Encoding: chunked" as a header +// and then closes Body when finished sending it. +func (req *Request) Write(w io.Writer) os.Error { + host := req.Host + if host == "" { + host = req.URL.Host + } + + uri := req.RawURL + if uri == "" { + uri = valueOrDefault(urlEscape(req.URL.Path, encodePath), "/") + if req.URL.RawQuery != "" { + uri += "?" + req.URL.RawQuery + } + } + + fmt.Fprintf(w, "%s %s HTTP/1.1\r\n", valueOrDefault(req.Method, "GET"), uri) + + // Header lines + fmt.Fprintf(w, "Host: %s\r\n", host) + fmt.Fprintf(w, "User-Agent: %s\r\n", valueOrDefault(req.UserAgent, defaultUserAgent)) + if req.Referer != "" { + fmt.Fprintf(w, "Referer: %s\r\n", req.Referer) + } + + // Process Body,ContentLength,Close,Trailer + tw, err := newTransferWriter(req) + if err != nil { + return err + } + err = tw.WriteHeader(w) + if err != nil { + return err + } + + // TODO: split long values? (If so, should share code with Conn.Write) + // TODO: if Header includes values for Host, User-Agent, or Referer, this + // may conflict with the User-Agent or Referer headers we add manually. + // One solution would be to remove the Host, UserAgent, and Referer fields + // from Request, and introduce Request methods along the lines of + // Response.{GetHeader,AddHeader} and string constants for "Host", + // "User-Agent" and "Referer". + err = writeSortedKeyValue(w, req.Header, reqExcludeHeader) + if err != nil { + return err + } + + io.WriteString(w, "\r\n") + + // Write body and trailer + err = tw.WriteBody(w) + if err != nil { + return err + } + + return nil +} + +// Read a line of bytes (up to \n) from b. +// Give up if the line exceeds maxLineLength. +// The returned bytes are a pointer into storage in +// the bufio, so they are only valid until the next bufio read. +func readLineBytes(b *bufio.Reader) (p []byte, err os.Error) { + if p, err = b.ReadSlice('\n'); err != nil { + // We always know when EOF is coming. + // If the caller asked for a line, there should be a line. + if err == os.EOF { + err = io.ErrUnexpectedEOF + } else if err == bufio.ErrBufferFull { + err = ErrLineTooLong + } + return nil, err + } + if len(p) >= maxLineLength { + return nil, ErrLineTooLong + } + + // Chop off trailing white space. + var i int + for i = len(p); i > 0; i-- { + if c := p[i-1]; c != ' ' && c != '\r' && c != '\t' && c != '\n' { + break + } + } + return p[0:i], nil +} + +// readLineBytes, but convert the bytes into a string. +func readLine(b *bufio.Reader) (s string, err os.Error) { + p, e := readLineBytes(b) + if e != nil { + return "", e + } + return string(p), nil +} + +var colon = []byte{':'} + +// Read a key/value pair from b. +// A key/value has the form Key: Value\r\n +// and the Value can continue on multiple lines if each continuation line +// starts with a space. +func readKeyValue(b *bufio.Reader) (key, value string, err os.Error) { + line, e := readLineBytes(b) + if e != nil { + return "", "", e + } + if len(line) == 0 { + return "", "", nil + } + + // Scan first line for colon. + i := bytes.Index(line, colon) + if i < 0 { + goto Malformed + } + + key = string(line[0:i]) + if strings.Contains(key, " ") { + // Key field has space - no good. + goto Malformed + } + + // Skip initial space before value. + for i++; i < len(line); i++ { + if line[i] != ' ' { + break + } + } + value = string(line[i:]) + + // Look for extension lines, which must begin with space. + for { + c, e := b.ReadByte() + if c != ' ' { + if e != os.EOF { + b.UnreadByte() + } + break + } + + // Eat leading space. + for c == ' ' { + if c, e = b.ReadByte(); e != nil { + if e == os.EOF { + e = io.ErrUnexpectedEOF + } + return "", "", e + } + } + b.UnreadByte() + + // Read the rest of the line and add to value. + if line, e = readLineBytes(b); e != nil { + return "", "", e + } + value += " " + string(line) + + if len(value) >= maxValueLength { + return "", "", &badStringError{"value too long for key", key} + } + } + return key, value, nil + +Malformed: + return "", "", &badStringError{"malformed header line", string(line)} +} + +// Convert decimal at s[i:len(s)] to integer, +// returning value, string position where the digits stopped, +// and whether there was a valid number (digits, not too big). +func atoi(s string, i int) (n, i1 int, ok bool) { + const Big = 1000000 + if i >= len(s) || s[i] < '0' || s[i] > '9' { + return 0, 0, false + } + n = 0 + for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ { + n = n*10 + int(s[i]-'0') + if n > Big { + return 0, 0, false + } + } + return n, i, true +} + +// Parse HTTP version: "HTTP/1.2" -> (1, 2, true). +func parseHTTPVersion(vers string) (int, int, bool) { + if len(vers) < 5 || vers[0:5] != "HTTP/" { + return 0, 0, false + } + major, i, ok := atoi(vers, 5) + if !ok || i >= len(vers) || vers[i] != '.' { + return 0, 0, false + } + var minor int + minor, i, ok = atoi(vers, i+1) + if !ok || i != len(vers) { + return 0, 0, false + } + return major, minor, true +} + +// CanonicalHeaderKey returns the canonical format of the +// HTTP header key s. The canonicalization converts the first +// letter and any letter following a hyphen to upper case; +// the rest are converted to lowercase. For example, the +// canonical key for "accept-encoding" is "Accept-Encoding". +func CanonicalHeaderKey(s string) string { + // canonicalize: first letter upper case + // and upper case after each dash. + // (Host, User-Agent, If-Modified-Since). + // HTTP headers are ASCII only, so no Unicode issues. + var a []byte + upper := true + for i := 0; i < len(s); i++ { + v := s[i] + if upper && 'a' <= v && v <= 'z' { + if a == nil { + a = []byte(s) + } + a[i] = v + 'A' - 'a' + } + if !upper && 'A' <= v && v <= 'Z' { + if a == nil { + a = []byte(s) + } + a[i] = v + 'a' - 'A' + } + upper = false + if v == '-' { + upper = true + } + } + if a != nil { + return string(a) + } + return s +} + +type chunkedReader struct { + r *bufio.Reader + n uint64 // unread bytes in chunk + err os.Error +} + +func newChunkedReader(r *bufio.Reader) *chunkedReader { + return &chunkedReader{r: r} +} + +func (cr *chunkedReader) beginChunk() { + // chunk-size CRLF + var line string + line, cr.err = readLine(cr.r) + if cr.err != nil { + return + } + cr.n, cr.err = strconv.Btoui64(line, 16) + if cr.err != nil { + return + } + if cr.n == 0 { + // trailer CRLF + for { + line, cr.err = readLine(cr.r) + if cr.err != nil { + return + } + if line == "" { + break + } + } + cr.err = os.EOF + } +} + +func (cr *chunkedReader) Read(b []uint8) (n int, err os.Error) { + if cr.err != nil { + return 0, cr.err + } + if cr.n == 0 { + cr.beginChunk() + if cr.err != nil { + return 0, cr.err + } + } + if uint64(len(b)) > cr.n { + b = b[0:cr.n] + } + n, cr.err = cr.r.Read(b) + cr.n -= uint64(n) + if cr.n == 0 && cr.err == nil { + // end of chunk (CRLF) + b := make([]byte, 2) + if _, cr.err = io.ReadFull(cr.r, b); cr.err == nil { + if b[0] != '\r' || b[1] != '\n' { + cr.err = os.NewError("malformed chunked encoding") + } + } + } + return n, cr.err +} + +// ReadRequest reads and parses a request from b. +func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) { + req = new(Request) + + // First line: GET /index.html HTTP/1.0 + var s string + if s, err = readLine(b); err != nil { + return nil, err + } + + var f []string + if f = strings.Split(s, " ", 3); len(f) < 3 { + return nil, &badStringError{"malformed HTTP request", s} + } + req.Method, req.RawURL, req.Proto = f[0], f[1], f[2] + var ok bool + if req.ProtoMajor, req.ProtoMinor, ok = parseHTTPVersion(req.Proto); !ok { + return nil, &badStringError{"malformed HTTP version", req.Proto} + } + + if req.URL, err = ParseRequestURL(req.RawURL); err != nil { + return nil, err + } + + // Subsequent lines: Key: value. + nheader := 0 + req.Header = make(map[string]string) + for { + var key, value string + if key, value, err = readKeyValue(b); err != nil { + return nil, err + } + if key == "" { + break + } + if nheader++; nheader >= maxHeaderLines { + return nil, ErrHeaderTooLong + } + + key = CanonicalHeaderKey(key) + + // RFC 2616 says that if you send the same header key + // multiple times, it has to be semantically equivalent + // to concatenating the values separated by commas. + oldvalue, present := req.Header[key] + if present { + req.Header[key] = oldvalue + "," + value + } else { + req.Header[key] = value + } + } + + // RFC2616: Must treat + // GET /index.html HTTP/1.1 + // Host: www.google.com + // and + // GET http://www.google.com/index.html HTTP/1.1 + // Host: doesntmatter + // the same. In the second case, any Host line is ignored. + req.Host = req.URL.Host + if req.Host == "" { + req.Host = req.Header["Host"] + } + req.Header["Host"] = "", false + + fixPragmaCacheControl(req.Header) + + // Pull out useful fields as a convenience to clients. + req.Referer = req.Header["Referer"] + req.Header["Referer"] = "", false + + req.UserAgent = req.Header["User-Agent"] + req.Header["User-Agent"] = "", false + + // TODO: Parse specific header values: + // Accept + // Accept-Encoding + // Accept-Language + // Authorization + // Cache-Control + // Connection + // Date + // Expect + // From + // If-Match + // If-Modified-Since + // If-None-Match + // If-Range + // If-Unmodified-Since + // Max-Forwards + // Proxy-Authorization + // Referer [sic] + // TE (transfer-codings) + // Trailer + // Transfer-Encoding + // Upgrade + // User-Agent + // Via + // Warning + + err = readTransfer(req, b) + if err != nil { + return nil, err + } + + return req, nil +} + +// ParseQuery parses the URL-encoded query string and returns +// a map listing the values specified for each key. +// ParseQuery always returns a non-nil map containing all the +// valid query parameters found; err describes the first decoding error +// encountered, if any. +func ParseQuery(query string) (m map[string][]string, err os.Error) { + m = make(map[string][]string) + err = parseQuery(m, query) + return +} + +func parseQuery(m map[string][]string, query string) (err os.Error) { + for _, kv := range strings.Split(query, "&", -1) { + if len(kv) == 0 { + continue + } + kvPair := strings.Split(kv, "=", 2) + + var key, value string + var e os.Error + key, e = URLUnescape(kvPair[0]) + if e == nil && len(kvPair) > 1 { + value, e = URLUnescape(kvPair[1]) + } + if e != nil { + err = e + continue + } + vec := vector.StringVector(m[key]) + vec.Push(value) + m[key] = vec + } + return err +} + +// ParseForm parses the request body as a form for POST requests, or the raw query for GET requests. +// It is idempotent. +func (r *Request) ParseForm() (err os.Error) { + if r.Form != nil { + return + } + + r.Form = make(map[string][]string) + if r.URL != nil { + err = parseQuery(r.Form, r.URL.RawQuery) + } + if r.Method == "POST" { + if r.Body == nil { + return os.ErrorString("missing form body") + } + ct := r.Header["Content-Type"] + switch strings.Split(ct, ";", 2)[0] { + case "text/plain", "application/x-www-form-urlencoded", "": + b, e := ioutil.ReadAll(r.Body) + if e != nil { + if err == nil { + err = e + } + break + } + e = parseQuery(r.Form, string(b)) + if err == nil { + err = e + } + // TODO(dsymonds): Handle multipart/form-data + default: + return &badStringError{"unknown Content-Type", ct} + } + } + return err +} + +// FormValue returns the first value for the named component of the query. +// FormValue calls ParseForm if necessary. +func (r *Request) FormValue(key string) string { + if r.Form == nil { + r.ParseForm() + } + if vs := r.Form[key]; len(vs) > 0 { + return vs[0] + } + return "" +} + +func (r *Request) expectsContinue() bool { + expectation, ok := r.Header["Expect"] + return ok && strings.ToLower(expectation) == "100-continue" +} + +func (r *Request) wantsHttp10KeepAlive() bool { + if r.ProtoMajor != 1 || r.ProtoMinor != 0 { + return false + } + value, exists := r.Header["Connection"] + if !exists { + return false + } + return strings.Contains(strings.ToLower(value), "keep-alive") +} diff --git a/libgo/go/http/request_test.go b/libgo/go/http/request_test.go new file mode 100644 index 000000000..d25e5e5e7 --- /dev/null +++ b/libgo/go/http/request_test.go @@ -0,0 +1,155 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "bytes" + "reflect" + "regexp" + "strings" + "testing" +) + +type stringMultimap map[string][]string + +type parseTest struct { + query string + out stringMultimap +} + +var parseTests = []parseTest{ + { + query: "a=1&b=2", + out: stringMultimap{"a": []string{"1"}, "b": []string{"2"}}, + }, + { + query: "a=1&a=2&a=banana", + out: stringMultimap{"a": []string{"1", "2", "banana"}}, + }, + { + query: "ascii=%3Ckey%3A+0x90%3E", + out: stringMultimap{"ascii": []string{"<key: 0x90>"}}, + }, +} + +func TestParseForm(t *testing.T) { + for i, test := range parseTests { + form, err := ParseQuery(test.query) + if err != nil { + t.Errorf("test %d: Unexpected error: %v", i, err) + continue + } + if len(form) != len(test.out) { + t.Errorf("test %d: len(form) = %d, want %d", i, len(form), len(test.out)) + } + for k, evs := range test.out { + vs, ok := form[k] + if !ok { + t.Errorf("test %d: Missing key %q", i, k) + continue + } + if len(vs) != len(evs) { + t.Errorf("test %d: len(form[%q]) = %d, want %d", i, k, len(vs), len(evs)) + continue + } + for j, ev := range evs { + if v := vs[j]; v != ev { + t.Errorf("test %d: form[%q][%d] = %q, want %q", i, k, j, v, ev) + } + } + } + } +} + +func TestQuery(t *testing.T) { + req := &Request{Method: "GET"} + req.URL, _ = ParseURL("http://www.google.com/search?q=foo&q=bar") + if q := req.FormValue("q"); q != "foo" { + t.Errorf(`req.FormValue("q") = %q, want "foo"`, q) + } +} + +func TestPostQuery(t *testing.T) { + req := &Request{Method: "POST"} + req.URL, _ = ParseURL("http://www.google.com/search?q=foo&q=bar&both=x") + req.Header = map[string]string{"Content-Type": "application/x-www-form-urlencoded; boo!"} + req.Body = nopCloser{strings.NewReader("z=post&both=y")} + if q := req.FormValue("q"); q != "foo" { + t.Errorf(`req.FormValue("q") = %q, want "foo"`, q) + } + if z := req.FormValue("z"); z != "post" { + t.Errorf(`req.FormValue("z") = %q, want "post"`, z) + } + if both := req.Form["both"]; !reflect.DeepEqual(both, []string{"x", "y"}) { + t.Errorf(`req.FormValue("both") = %q, want ["x", "y"]`, both) + } +} + +type stringMap map[string]string +type parseContentTypeTest struct { + contentType stringMap + error bool +} + +var parseContentTypeTests = []parseContentTypeTest{ + {contentType: stringMap{"Content-Type": "text/plain"}}, + {contentType: stringMap{"Content-Type": ""}}, + {contentType: stringMap{"Content-Type": "text/plain; boundary="}}, + { + contentType: stringMap{"Content-Type": "application/unknown"}, + error: true, + }, +} + +func TestPostContentTypeParsing(t *testing.T) { + for i, test := range parseContentTypeTests { + req := &Request{ + Method: "POST", + Header: test.contentType, + Body: nopCloser{bytes.NewBufferString("body")}, + } + err := req.ParseForm() + if !test.error && err != nil { + t.Errorf("test %d: Unexpected error: %v", i, err) + } + if test.error && err == nil { + t.Errorf("test %d should have returned error", i) + } + } +} + +func TestMultipartReader(t *testing.T) { + req := &Request{ + Method: "POST", + Header: stringMap{"Content-Type": `multipart/form-data; boundary="foo123"`}, + Body: nopCloser{new(bytes.Buffer)}, + } + multipart, err := req.MultipartReader() + if multipart == nil { + t.Errorf("expected multipart; error: %v", err) + } + + req.Header = stringMap{"Content-Type": "text/plain"} + multipart, err = req.MultipartReader() + if multipart != nil { + t.Errorf("unexpected multipart for text/plain") + } +} + +func TestRedirect(t *testing.T) { + const ( + start = "http://google.com/" + endRe = "^http://www\\.google\\.[a-z.]+/$" + ) + var end = regexp.MustCompile(endRe) + r, url, err := Get(start) + if err != nil { + t.Fatal(err) + } + r.Body.Close() + if r.StatusCode != 200 || !end.MatchString(url) { + t.Fatalf("Get(%s) got status %d at %q, want 200 matching %q", start, r.StatusCode, url, endRe) + } +} diff --git a/libgo/go/http/requestwrite_test.go b/libgo/go/http/requestwrite_test.go new file mode 100644 index 000000000..3ceabe4ee --- /dev/null +++ b/libgo/go/http/requestwrite_test.go @@ -0,0 +1,139 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "bytes" + "testing" +) + +type reqWriteTest struct { + Req Request + Raw string +} + +var reqWriteTests = []reqWriteTest{ + // HTTP/1.1 => chunked coding; no body; no trailer + { + Request{ + Method: "GET", + RawURL: "http://www.techcrunch.com/", + URL: &URL{ + Raw: "http://www.techcrunch.com/", + Scheme: "http", + RawPath: "http://www.techcrunch.com/", + RawAuthority: "www.techcrunch.com", + RawUserinfo: "", + Host: "www.techcrunch.com", + Path: "/", + RawQuery: "", + Fragment: "", + }, + Proto: "HTTP/1.1", + ProtoMajor: 1, + ProtoMinor: 1, + Header: map[string]string{ + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7", + "Accept-Encoding": "gzip,deflate", + "Accept-Language": "en-us,en;q=0.5", + "Keep-Alive": "300", + "Proxy-Connection": "keep-alive", + }, + Body: nil, + Close: false, + Host: "www.techcrunch.com", + Referer: "", + UserAgent: "Fake", + Form: map[string][]string{}, + }, + + "GET http://www.techcrunch.com/ HTTP/1.1\r\n" + + "Host: www.techcrunch.com\r\n" + + "User-Agent: Fake\r\n" + + "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n" + + "Accept-Encoding: gzip,deflate\r\n" + + "Accept-Language: en-us,en;q=0.5\r\n" + + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" + + "Keep-Alive: 300\r\n" + + "Proxy-Connection: keep-alive\r\n\r\n", + }, + // HTTP/1.1 => chunked coding; body; empty trailer + { + Request{ + Method: "GET", + URL: &URL{ + Scheme: "http", + Host: "www.google.com", + Path: "/search", + }, + ProtoMajor: 1, + ProtoMinor: 1, + Header: map[string]string{}, + Body: nopCloser{bytes.NewBufferString("abcdef")}, + TransferEncoding: []string{"chunked"}, + }, + + "GET /search HTTP/1.1\r\n" + + "Host: www.google.com\r\n" + + "User-Agent: Go http package\r\n" + + "Transfer-Encoding: chunked\r\n\r\n" + + "6\r\nabcdef\r\n0\r\n\r\n", + }, + // HTTP/1.1 POST => chunked coding; body; empty trailer + { + Request{ + Method: "POST", + URL: &URL{ + Scheme: "http", + Host: "www.google.com", + Path: "/search", + }, + ProtoMajor: 1, + ProtoMinor: 1, + Header: map[string]string{}, + Close: true, + Body: nopCloser{bytes.NewBufferString("abcdef")}, + TransferEncoding: []string{"chunked"}, + }, + + "POST /search HTTP/1.1\r\n" + + "Host: www.google.com\r\n" + + "User-Agent: Go http package\r\n" + + "Connection: close\r\n" + + "Transfer-Encoding: chunked\r\n\r\n" + + "6\r\nabcdef\r\n0\r\n\r\n", + }, + // default to HTTP/1.1 + { + Request{ + Method: "GET", + RawURL: "/search", + Host: "www.google.com", + }, + + "GET /search HTTP/1.1\r\n" + + "Host: www.google.com\r\n" + + "User-Agent: Go http package\r\n" + + "\r\n", + }, +} + +func TestRequestWrite(t *testing.T) { + for i := range reqWriteTests { + tt := &reqWriteTests[i] + var braw bytes.Buffer + err := tt.Req.Write(&braw) + if err != nil { + t.Errorf("error writing #%d: %s", i, err) + continue + } + sraw := braw.String() + if sraw != tt.Raw { + t.Errorf("Test %d, expecting:\n%s\nGot:\n%s\n", i, tt.Raw, sraw) + continue + } + } +} diff --git a/libgo/go/http/response.go b/libgo/go/http/response.go new file mode 100644 index 000000000..a24726110 --- /dev/null +++ b/libgo/go/http/response.go @@ -0,0 +1,251 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// HTTP Response reading and parsing. + +package http + +import ( + "bufio" + "fmt" + "io" + "os" + "sort" + "strconv" + "strings" +) + +var respExcludeHeader = map[string]bool{ + "Content-Length": true, + "Transfer-Encoding": true, + "Trailer": true, +} + +// Response represents the response from an HTTP request. +// +type Response struct { + Status string // e.g. "200 OK" + StatusCode int // e.g. 200 + Proto string // e.g. "HTTP/1.0" + ProtoMajor int // e.g. 1 + ProtoMinor int // e.g. 0 + + // RequestMethod records the method used in the HTTP request. + // Header fields such as Content-Length have method-specific meaning. + RequestMethod string // e.g. "HEAD", "CONNECT", "GET", etc. + + // Header maps header keys to values. If the response had multiple + // headers with the same key, they will be concatenated, with comma + // delimiters. (Section 4.2 of RFC 2616 requires that multiple headers + // be semantically equivalent to a comma-delimited sequence.) Values + // duplicated by other fields in this struct (e.g., ContentLength) are + // omitted from Header. + // + // Keys in the map are canonicalized (see CanonicalHeaderKey). + Header map[string]string + + // Body represents the response body. + Body io.ReadCloser + + // ContentLength records the length of the associated content. The + // value -1 indicates that the length is unknown. Unless RequestMethod + // is "HEAD", values >= 0 indicate that the given number of bytes may + // be read from Body. + ContentLength int64 + + // Contains transfer encodings from outer-most to inner-most. Value is + // nil, means that "identity" encoding is used. + TransferEncoding []string + + // Close records whether the header directed that the connection be + // closed after reading Body. The value is advice for clients: neither + // ReadResponse nor Response.Write ever closes a connection. + Close bool + + // Trailer maps trailer keys to values. Like for Header, if the + // response has multiple trailer lines with the same key, they will be + // concatenated, delimited by commas. + Trailer map[string]string +} + +// ReadResponse reads and returns an HTTP response from r. The RequestMethod +// parameter specifies the method used in the corresponding request (e.g., +// "GET", "HEAD"). Clients must call resp.Body.Close when finished reading +// resp.Body. After that call, clients can inspect resp.Trailer to find +// key/value pairs included in the response trailer. +func ReadResponse(r *bufio.Reader, requestMethod string) (resp *Response, err os.Error) { + + resp = new(Response) + + resp.RequestMethod = strings.ToUpper(requestMethod) + + // Parse the first line of the response. + line, err := readLine(r) + if err != nil { + return nil, err + } + f := strings.Split(line, " ", 3) + if len(f) < 2 { + return nil, &badStringError{"malformed HTTP response", line} + } + reasonPhrase := "" + if len(f) > 2 { + reasonPhrase = f[2] + } + resp.Status = f[1] + " " + reasonPhrase + resp.StatusCode, err = strconv.Atoi(f[1]) + if err != nil { + return nil, &badStringError{"malformed HTTP status code", f[1]} + } + + resp.Proto = f[0] + var ok bool + if resp.ProtoMajor, resp.ProtoMinor, ok = parseHTTPVersion(resp.Proto); !ok { + return nil, &badStringError{"malformed HTTP version", resp.Proto} + } + + // Parse the response headers. + nheader := 0 + resp.Header = make(map[string]string) + for { + key, value, err := readKeyValue(r) + if err != nil { + return nil, err + } + if key == "" { + break // end of response header + } + if nheader++; nheader >= maxHeaderLines { + return nil, ErrHeaderTooLong + } + resp.AddHeader(key, value) + } + + fixPragmaCacheControl(resp.Header) + + err = readTransfer(resp, r) + if err != nil { + return nil, err + } + + return resp, nil +} + +// RFC2616: Should treat +// Pragma: no-cache +// like +// Cache-Control: no-cache +func fixPragmaCacheControl(header map[string]string) { + if header["Pragma"] == "no-cache" { + if _, presentcc := header["Cache-Control"]; !presentcc { + header["Cache-Control"] = "no-cache" + } + } +} + +// AddHeader adds a value under the given key. Keys are not case sensitive. +func (r *Response) AddHeader(key, value string) { + key = CanonicalHeaderKey(key) + + oldValues, oldValuesPresent := r.Header[key] + if oldValuesPresent { + r.Header[key] = oldValues + "," + value + } else { + r.Header[key] = value + } +} + +// GetHeader returns the value of the response header with the given key. +// If there were multiple headers with this key, their values are concatenated, +// with a comma delimiter. If there were no response headers with the given +// key, GetHeader returns an empty string. Keys are not case sensitive. +func (r *Response) GetHeader(key string) (value string) { + return r.Header[CanonicalHeaderKey(key)] +} + +// ProtoAtLeast returns whether the HTTP protocol used +// in the response is at least major.minor. +func (r *Response) ProtoAtLeast(major, minor int) bool { + return r.ProtoMajor > major || + r.ProtoMajor == major && r.ProtoMinor >= minor +} + +// Writes the response (header, body and trailer) in wire format. This method +// consults the following fields of resp: +// +// StatusCode +// ProtoMajor +// ProtoMinor +// RequestMethod +// TransferEncoding +// Trailer +// Body +// ContentLength +// Header, values for non-canonical keys will have unpredictable behavior +// +func (resp *Response) Write(w io.Writer) os.Error { + + // RequestMethod should be upper-case + resp.RequestMethod = strings.ToUpper(resp.RequestMethod) + + // Status line + text := resp.Status + if text == "" { + var ok bool + text, ok = statusText[resp.StatusCode] + if !ok { + text = "status code " + strconv.Itoa(resp.StatusCode) + } + } + io.WriteString(w, "HTTP/"+strconv.Itoa(resp.ProtoMajor)+".") + io.WriteString(w, strconv.Itoa(resp.ProtoMinor)+" ") + io.WriteString(w, strconv.Itoa(resp.StatusCode)+" "+text+"\r\n") + + // Process Body,ContentLength,Close,Trailer + tw, err := newTransferWriter(resp) + if err != nil { + return err + } + err = tw.WriteHeader(w) + if err != nil { + return err + } + + // Rest of header + err = writeSortedKeyValue(w, resp.Header, respExcludeHeader) + if err != nil { + return err + } + + // End-of-header + io.WriteString(w, "\r\n") + + // Write body and trailer + err = tw.WriteBody(w) + if err != nil { + return err + } + + // Success + return nil +} + +func writeSortedKeyValue(w io.Writer, kvm map[string]string, exclude map[string]bool) os.Error { + kva := make([]string, len(kvm)) + i := 0 + for k, v := range kvm { + if !exclude[k] { + kva[i] = fmt.Sprint(k + ": " + v + "\r\n") + i++ + } + } + kva = kva[0:i] + sort.SortStrings(kva) + for _, l := range kva { + if _, err := io.WriteString(w, l); err != nil { + return err + } + } + return nil +} diff --git a/libgo/go/http/response_test.go b/libgo/go/http/response_test.go new file mode 100644 index 000000000..89a8c3b44 --- /dev/null +++ b/libgo/go/http/response_test.go @@ -0,0 +1,203 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "bufio" + "bytes" + "fmt" + "io" + "reflect" + "testing" +) + +type respTest struct { + Raw string + Resp Response + Body string +} + +var respTests = []respTest{ + // Unchunked response without Content-Length. + { + "HTTP/1.0 200 OK\r\n" + + "Connection: close\r\n" + + "\r\n" + + "Body here\n", + + Response{ + Status: "200 OK", + StatusCode: 200, + Proto: "HTTP/1.0", + ProtoMajor: 1, + ProtoMinor: 0, + RequestMethod: "GET", + Header: map[string]string{ + "Connection": "close", // TODO(rsc): Delete? + }, + Close: true, + ContentLength: -1, + }, + + "Body here\n", + }, + + // Unchunked response with Content-Length. + { + "HTTP/1.0 200 OK\r\n" + + "Content-Length: 10\r\n" + + "Connection: close\r\n" + + "\r\n" + + "Body here\n", + + Response{ + Status: "200 OK", + StatusCode: 200, + Proto: "HTTP/1.0", + ProtoMajor: 1, + ProtoMinor: 0, + RequestMethod: "GET", + Header: map[string]string{ + "Connection": "close", // TODO(rsc): Delete? + "Content-Length": "10", // TODO(rsc): Delete? + }, + Close: true, + ContentLength: 10, + }, + + "Body here\n", + }, + + // Chunked response without Content-Length. + { + "HTTP/1.0 200 OK\r\n" + + "Transfer-Encoding: chunked\r\n" + + "\r\n" + + "0a\r\n" + + "Body here\n" + + "0\r\n" + + "\r\n", + + Response{ + Status: "200 OK", + StatusCode: 200, + Proto: "HTTP/1.0", + ProtoMajor: 1, + ProtoMinor: 0, + RequestMethod: "GET", + Header: map[string]string{}, + Close: true, + ContentLength: -1, + TransferEncoding: []string{"chunked"}, + }, + + "Body here\n", + }, + + // Chunked response with Content-Length. + { + "HTTP/1.0 200 OK\r\n" + + "Transfer-Encoding: chunked\r\n" + + "Content-Length: 10\r\n" + + "\r\n" + + "0a\r\n" + + "Body here\n" + + "0\r\n" + + "\r\n", + + Response{ + Status: "200 OK", + StatusCode: 200, + Proto: "HTTP/1.0", + ProtoMajor: 1, + ProtoMinor: 0, + RequestMethod: "GET", + Header: map[string]string{}, + Close: true, + ContentLength: -1, // TODO(rsc): Fix? + TransferEncoding: []string{"chunked"}, + }, + + "Body here\n", + }, + + // Status line without a Reason-Phrase, but trailing space. + // (permitted by RFC 2616) + { + "HTTP/1.0 303 \r\n\r\n", + Response{ + Status: "303 ", + StatusCode: 303, + Proto: "HTTP/1.0", + ProtoMajor: 1, + ProtoMinor: 0, + RequestMethod: "GET", + Header: map[string]string{}, + Close: true, + ContentLength: -1, + }, + + "", + }, + + // Status line without a Reason-Phrase, and no trailing space. + // (not permitted by RFC 2616, but we'll accept it anyway) + { + "HTTP/1.0 303\r\n\r\n", + Response{ + Status: "303 ", + StatusCode: 303, + Proto: "HTTP/1.0", + ProtoMajor: 1, + ProtoMinor: 0, + RequestMethod: "GET", + Header: map[string]string{}, + Close: true, + ContentLength: -1, + }, + + "", + }, +} + +func TestReadResponse(t *testing.T) { + for i := range respTests { + tt := &respTests[i] + var braw bytes.Buffer + braw.WriteString(tt.Raw) + resp, err := ReadResponse(bufio.NewReader(&braw), tt.Resp.RequestMethod) + if err != nil { + t.Errorf("#%d: %s", i, err) + continue + } + rbody := resp.Body + resp.Body = nil + diff(t, fmt.Sprintf("#%d Response", i), resp, &tt.Resp) + var bout bytes.Buffer + if rbody != nil { + io.Copy(&bout, rbody) + rbody.Close() + } + body := bout.String() + if body != tt.Body { + t.Errorf("#%d: Body = %q want %q", i, body, tt.Body) + } + } +} + +func diff(t *testing.T, prefix string, have, want interface{}) { + hv := reflect.NewValue(have).(*reflect.PtrValue).Elem().(*reflect.StructValue) + wv := reflect.NewValue(want).(*reflect.PtrValue).Elem().(*reflect.StructValue) + if hv.Type() != wv.Type() { + t.Errorf("%s: type mismatch %v vs %v", prefix, hv.Type(), wv.Type()) + } + for i := 0; i < hv.NumField(); i++ { + hf := hv.Field(i).Interface() + wf := wv.Field(i).Interface() + if !reflect.DeepEqual(hf, wf) { + t.Errorf("%s: %s = %v want %v", prefix, hv.Type().(*reflect.StructType).Field(i).Name, hf, wf) + } + } +} diff --git a/libgo/go/http/responsewrite_test.go b/libgo/go/http/responsewrite_test.go new file mode 100644 index 000000000..9f10be562 --- /dev/null +++ b/libgo/go/http/responsewrite_test.go @@ -0,0 +1,85 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "bytes" + "testing" +) + +type respWriteTest struct { + Resp Response + Raw string +} + +var respWriteTests = []respWriteTest{ + // HTTP/1.0, identity coding; no trailer + { + Response{ + StatusCode: 503, + ProtoMajor: 1, + ProtoMinor: 0, + RequestMethod: "GET", + Header: map[string]string{}, + Body: nopCloser{bytes.NewBufferString("abcdef")}, + ContentLength: 6, + }, + + "HTTP/1.0 503 Service Unavailable\r\n" + + "Content-Length: 6\r\n\r\n" + + "abcdef", + }, + // Unchunked response without Content-Length. + { + Response{ + StatusCode: 200, + ProtoMajor: 1, + ProtoMinor: 0, + RequestMethod: "GET", + Header: map[string]string{}, + Body: nopCloser{bytes.NewBufferString("abcdef")}, + ContentLength: -1, + }, + "HTTP/1.0 200 OK\r\n" + + "\r\n" + + "abcdef", + }, + // HTTP/1.1, chunked coding; empty trailer; close + { + Response{ + StatusCode: 200, + ProtoMajor: 1, + ProtoMinor: 1, + RequestMethod: "GET", + Header: map[string]string{}, + Body: nopCloser{bytes.NewBufferString("abcdef")}, + ContentLength: 6, + TransferEncoding: []string{"chunked"}, + Close: true, + }, + + "HTTP/1.1 200 OK\r\n" + + "Connection: close\r\n" + + "Transfer-Encoding: chunked\r\n\r\n" + + "6\r\nabcdef\r\n0\r\n\r\n", + }, +} + +func TestResponseWrite(t *testing.T) { + for i := range respWriteTests { + tt := &respWriteTests[i] + var braw bytes.Buffer + err := tt.Resp.Write(&braw) + if err != nil { + t.Errorf("error writing #%d: %s", i, err) + continue + } + sraw := braw.String() + if sraw != tt.Raw { + t.Errorf("Test %d, expecting:\n%s\nGot:\n%s\n", i, tt.Raw, sraw) + continue + } + } +} diff --git a/libgo/go/http/serve_test.go b/libgo/go/http/serve_test.go new file mode 100644 index 000000000..053d6dca4 --- /dev/null +++ b/libgo/go/http/serve_test.go @@ -0,0 +1,220 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// End-to-end serving tests + +package http + +import ( + "bufio" + "bytes" + "io" + "os" + "net" + "testing" +) + +type dummyAddr string +type oneConnListener struct { + conn net.Conn +} + +func (l *oneConnListener) Accept() (c net.Conn, err os.Error) { + c = l.conn + if c == nil { + err = os.EOF + return + } + err = nil + l.conn = nil + return +} + +func (l *oneConnListener) Close() os.Error { + return nil +} + +func (l *oneConnListener) Addr() net.Addr { + return dummyAddr("test-address") +} + +func (a dummyAddr) Network() string { + return string(a) +} + +func (a dummyAddr) String() string { + return string(a) +} + +type testConn struct { + readBuf bytes.Buffer + writeBuf bytes.Buffer +} + +func (c *testConn) Read(b []byte) (int, os.Error) { + return c.readBuf.Read(b) +} + +func (c *testConn) Write(b []byte) (int, os.Error) { + return c.writeBuf.Write(b) +} + +func (c *testConn) Close() os.Error { + return nil +} + +func (c *testConn) LocalAddr() net.Addr { + return dummyAddr("local-addr") +} + +func (c *testConn) RemoteAddr() net.Addr { + return dummyAddr("remote-addr") +} + +func (c *testConn) SetTimeout(nsec int64) os.Error { + return nil +} + +func (c *testConn) SetReadTimeout(nsec int64) os.Error { + return nil +} + +func (c *testConn) SetWriteTimeout(nsec int64) os.Error { + return nil +} + +func TestConsumingBodyOnNextConn(t *testing.T) { + conn := new(testConn) + for i := 0; i < 2; i++ { + conn.readBuf.Write([]byte( + "POST / HTTP/1.1\r\n" + + "Host: test\r\n" + + "Content-Length: 11\r\n" + + "\r\n" + + "foo=1&bar=1")) + } + + reqNum := 0 + ch := make(chan *Request) + servech := make(chan os.Error) + listener := &oneConnListener{conn} + handler := func(res ResponseWriter, req *Request) { + reqNum++ + t.Logf("Got request #%d: %v", reqNum, req) + ch <- req + } + + go func() { + servech <- Serve(listener, HandlerFunc(handler)) + }() + + var req *Request + t.Log("Waiting for first request.") + req = <-ch + if req == nil { + t.Fatal("Got nil first request.") + } + if req.Method != "POST" { + t.Errorf("For request #1's method, got %q; expected %q", + req.Method, "POST") + } + + t.Log("Waiting for second request.") + req = <-ch + if req == nil { + t.Fatal("Got nil first request.") + } + if req.Method != "POST" { + t.Errorf("For request #2's method, got %q; expected %q", + req.Method, "POST") + } + + t.Log("Waiting for EOF.") + if serveerr := <-servech; serveerr != os.EOF { + t.Errorf("Serve returned %q; expected EOF", serveerr) + } +} + +type responseWriterMethodCall struct { + method string + headerKey, headerValue string // if method == "SetHeader" + bytesWritten []byte // if method == "Write" + responseCode int // if method == "WriteHeader" +} + +type recordingResponseWriter struct { + log []*responseWriterMethodCall +} + +func (rw *recordingResponseWriter) RemoteAddr() string { + return "1.2.3.4" +} + +func (rw *recordingResponseWriter) UsingTLS() bool { + return false +} + +func (rw *recordingResponseWriter) SetHeader(k, v string) { + rw.log = append(rw.log, &responseWriterMethodCall{method: "SetHeader", headerKey: k, headerValue: v}) +} + +func (rw *recordingResponseWriter) Write(buf []byte) (int, os.Error) { + rw.log = append(rw.log, &responseWriterMethodCall{method: "Write", bytesWritten: buf}) + return len(buf), nil +} + +func (rw *recordingResponseWriter) WriteHeader(code int) { + rw.log = append(rw.log, &responseWriterMethodCall{method: "WriteHeader", responseCode: code}) +} + +func (rw *recordingResponseWriter) Flush() { + rw.log = append(rw.log, &responseWriterMethodCall{method: "Flush"}) +} + +func (rw *recordingResponseWriter) Hijack() (io.ReadWriteCloser, *bufio.ReadWriter, os.Error) { + panic("Not supported") +} + +// Tests for http://code.google.com/p/go/issues/detail?id=900 +func TestMuxRedirectLeadingSlashes(t *testing.T) { + paths := []string{"//foo.txt", "///foo.txt", "/../../foo.txt"} + for _, path := range paths { + req, err := ReadRequest(bufio.NewReader(bytes.NewBufferString("GET " + path + " HTTP/1.1\r\nHost: test\r\n\r\n"))) + if err != nil { + t.Errorf("%s", err) + } + mux := NewServeMux() + resp := new(recordingResponseWriter) + resp.log = make([]*responseWriterMethodCall, 0) + + mux.ServeHTTP(resp, req) + + dumpLog := func() { + t.Logf("For path %q:", path) + for _, call := range resp.log { + t.Logf("Got call: %s, header=%s, value=%s, buf=%q, code=%d", call.method, + call.headerKey, call.headerValue, call.bytesWritten, call.responseCode) + } + } + + if len(resp.log) != 2 { + dumpLog() + t.Errorf("expected 2 calls to response writer; got %d", len(resp.log)) + return + } + + if resp.log[0].method != "SetHeader" || + resp.log[0].headerKey != "Location" || resp.log[0].headerValue != "/foo.txt" { + dumpLog() + t.Errorf("Expected SetHeader of Location to /foo.txt") + return + } + + if resp.log[1].method != "WriteHeader" || resp.log[1].responseCode != StatusMovedPermanently { + dumpLog() + t.Errorf("Expected WriteHeader of StatusMovedPermanently") + return + } + } +} diff --git a/libgo/go/http/server.go b/libgo/go/http/server.go new file mode 100644 index 000000000..644724f58 --- /dev/null +++ b/libgo/go/http/server.go @@ -0,0 +1,766 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// HTTP server. See RFC 2616. + +// TODO(rsc): +// logging +// cgi support +// post support + +package http + +import ( + "bufio" + "crypto/rand" + "crypto/tls" + "fmt" + "io" + "log" + "net" + "os" + "path" + "strconv" + "strings" + "time" +) + +// Errors introduced by the HTTP server. +var ( + ErrWriteAfterFlush = os.NewError("Conn.Write called after Flush") + ErrBodyNotAllowed = os.NewError("http: response status code does not allow body") + ErrHijacked = os.NewError("Conn has been hijacked") +) + +// Objects implementing the Handler interface can be +// registered to serve a particular path or subtree +// in the HTTP server. +// +// ServeHTTP should write reply headers and data to the ResponseWriter +// and then return. Returning signals that the request is finished +// and that the HTTP server can move on to the next request on +// the connection. +type Handler interface { + ServeHTTP(ResponseWriter, *Request) +} + +// A ResponseWriter interface is used by an HTTP handler to +// construct an HTTP response. +type ResponseWriter interface { + // RemoteAddr returns the address of the client that sent the current request + RemoteAddr() string + + // UsingTLS returns true if the client is connected using TLS + UsingTLS() bool + + // SetHeader sets a header line in the eventual response. + // For example, SetHeader("Content-Type", "text/html; charset=utf-8") + // will result in the header line + // + // Content-Type: text/html; charset=utf-8 + // + // being sent. UTF-8 encoded HTML is the default setting for + // Content-Type in this library, so users need not make that + // particular call. Calls to SetHeader after WriteHeader (or Write) + // are ignored. + SetHeader(string, string) + + // Write writes the data to the connection as part of an HTTP reply. + // If WriteHeader has not yet been called, Write calls WriteHeader(http.StatusOK) + // before writing the data. + Write([]byte) (int, os.Error) + + // WriteHeader sends an HTTP response header with status code. + // If WriteHeader is not called explicitly, the first call to Write + // will trigger an implicit WriteHeader(http.StatusOK). + // Thus explicit calls to WriteHeader are mainly used to + // send error codes. + WriteHeader(int) + + // Flush sends any buffered data to the client. + Flush() + + // Hijack lets the caller take over the connection. + // After a call to Hijack(), the HTTP server library + // will not do anything else with the connection. + // It becomes the caller's responsibility to manage + // and close the connection. + Hijack() (io.ReadWriteCloser, *bufio.ReadWriter, os.Error) +} + +// A conn represents the server side of an HTTP connection. +type conn struct { + remoteAddr string // network address of remote side + handler Handler // request handler + rwc io.ReadWriteCloser // i/o connection + buf *bufio.ReadWriter // buffered rwc + hijacked bool // connection has been hijacked by handler + usingTLS bool // a flag indicating connection over TLS +} + +// A response represents the server side of an HTTP response. +type response struct { + conn *conn + req *Request // request for this response + chunking bool // using chunked transfer encoding for reply body + wroteHeader bool // reply header has been written + wroteContinue bool // 100 Continue response was written + header map[string]string // reply header parameters + written int64 // number of bytes written in body + status int // status code passed to WriteHeader + + // close connection after this reply. set on request and + // updated after response from handler if there's a + // "Connection: keep-alive" response header and a + // Content-Length. + closeAfterReply bool +} + +// Create new connection from rwc. +func newConn(rwc net.Conn, handler Handler) (c *conn, err os.Error) { + c = new(conn) + c.remoteAddr = rwc.RemoteAddr().String() + c.handler = handler + c.rwc = rwc + _, c.usingTLS = rwc.(*tls.Conn) + br := bufio.NewReader(rwc) + bw := bufio.NewWriter(rwc) + c.buf = bufio.NewReadWriter(br, bw) + return c, nil +} + +// wrapper around io.ReaderCloser which on first read, sends an +// HTTP/1.1 100 Continue header +type expectContinueReader struct { + resp *response + readCloser io.ReadCloser +} + +func (ecr *expectContinueReader) Read(p []byte) (n int, err os.Error) { + if !ecr.resp.wroteContinue && !ecr.resp.conn.hijacked { + ecr.resp.wroteContinue = true + io.WriteString(ecr.resp.conn.buf, "HTTP/1.1 100 Continue\r\n\r\n") + ecr.resp.conn.buf.Flush() + } + return ecr.readCloser.Read(p) +} + +func (ecr *expectContinueReader) Close() os.Error { + return ecr.readCloser.Close() +} + +// TimeFormat is the time format to use with +// time.Parse and time.Time.Format when parsing +// or generating times in HTTP headers. +// It is like time.RFC1123 but hard codes GMT as the time zone. +const TimeFormat = "Mon, 02 Jan 2006 15:04:05 GMT" + +// Read next request from connection. +func (c *conn) readRequest() (w *response, err os.Error) { + if c.hijacked { + return nil, ErrHijacked + } + var req *Request + if req, err = ReadRequest(c.buf.Reader); err != nil { + return nil, err + } + + w = new(response) + w.conn = c + w.req = req + w.header = make(map[string]string) + + // Expect 100 Continue support + if req.expectsContinue() && req.ProtoAtLeast(1, 1) { + // Wrap the Body reader with one that replies on the connection + req.Body = &expectContinueReader{readCloser: req.Body, resp: w} + } + + // Default output is HTML encoded in UTF-8. + w.SetHeader("Content-Type", "text/html; charset=utf-8") + w.SetHeader("Date", time.UTC().Format(TimeFormat)) + + if req.Method == "HEAD" { + // do nothing + } else if req.ProtoAtLeast(1, 1) { + // HTTP/1.1 or greater: use chunked transfer encoding + // to avoid closing the connection at EOF. + w.chunking = true + w.SetHeader("Transfer-Encoding", "chunked") + } else { + // HTTP version < 1.1: cannot do chunked transfer + // encoding, so signal EOF by closing connection. + // Will be overridden if the HTTP handler ends up + // writing a Content-Length and the client requested + // "Connection: keep-alive" + w.closeAfterReply = true + } + + return w, nil +} + +// UsingTLS implements the ResponseWriter.UsingTLS +func (w *response) UsingTLS() bool { + return w.conn.usingTLS +} + +// RemoteAddr implements the ResponseWriter.RemoteAddr method +func (w *response) RemoteAddr() string { return w.conn.remoteAddr } + +// SetHeader implements the ResponseWriter.SetHeader method +func (w *response) SetHeader(hdr, val string) { w.header[CanonicalHeaderKey(hdr)] = val } + +// WriteHeader implements the ResponseWriter.WriteHeader method +func (w *response) WriteHeader(code int) { + if w.conn.hijacked { + log.Print("http: response.WriteHeader on hijacked connection") + return + } + if w.wroteHeader { + log.Print("http: multiple response.WriteHeader calls") + return + } + w.wroteHeader = true + w.status = code + if code == StatusNotModified { + // Must not have body. + w.header["Content-Type"] = "", false + w.header["Transfer-Encoding"] = "", false + w.chunking = false + } + // Cannot use Content-Length with non-identity Transfer-Encoding. + if w.chunking { + w.header["Content-Length"] = "", false + } + if !w.req.ProtoAtLeast(1, 0) { + return + } + proto := "HTTP/1.0" + if w.req.ProtoAtLeast(1, 1) { + proto = "HTTP/1.1" + } + codestring := strconv.Itoa(code) + text, ok := statusText[code] + if !ok { + text = "status code " + codestring + } + io.WriteString(w.conn.buf, proto+" "+codestring+" "+text+"\r\n") + for k, v := range w.header { + io.WriteString(w.conn.buf, k+": "+v+"\r\n") + } + io.WriteString(w.conn.buf, "\r\n") +} + +// Write implements the ResponseWriter.Write method +func (w *response) Write(data []byte) (n int, err os.Error) { + if w.conn.hijacked { + log.Print("http: response.Write on hijacked connection") + return 0, ErrHijacked + } + if !w.wroteHeader { + if w.req.wantsHttp10KeepAlive() { + _, hasLength := w.header["Content-Length"] + if hasLength { + _, connectionHeaderSet := w.header["Connection"] + if !connectionHeaderSet { + w.header["Connection"] = "keep-alive" + } + } + } + w.WriteHeader(StatusOK) + } + if len(data) == 0 { + return 0, nil + } + + if w.status == StatusNotModified || w.req.Method == "HEAD" { + // Must not have body. + return 0, ErrBodyNotAllowed + } + + w.written += int64(len(data)) // ignoring errors, for errorKludge + + // TODO(rsc): if chunking happened after the buffering, + // then there would be fewer chunk headers. + // On the other hand, it would make hijacking more difficult. + if w.chunking { + fmt.Fprintf(w.conn.buf, "%x\r\n", len(data)) // TODO(rsc): use strconv not fmt + } + n, err = w.conn.buf.Write(data) + if err == nil && w.chunking { + if n != len(data) { + err = io.ErrShortWrite + } + if err == nil { + io.WriteString(w.conn.buf, "\r\n") + } + } + + return n, err +} + +// If this is an error reply (4xx or 5xx) +// and the handler wrote some data explaining the error, +// some browsers (i.e., Chrome, Internet Explorer) +// will show their own error instead unless the error is +// long enough. The minimum lengths used in those +// browsers are in the 256-512 range. +// Pad to 1024 bytes. +func errorKludge(w *response) { + const min = 1024 + + // Is this an error? + if kind := w.status / 100; kind != 4 && kind != 5 { + return + } + + // Did the handler supply any info? Enough? + if w.written == 0 || w.written >= min { + return + } + + // Is it a broken browser? + var msg string + switch agent := w.req.UserAgent; { + case strings.Contains(agent, "MSIE"): + msg = "Internet Explorer" + case strings.Contains(agent, "Chrome/"): + msg = "Chrome" + default: + return + } + msg += " would ignore this error page if this text weren't here.\n" + + // Is it text? ("Content-Type" is always in the map) + baseType := strings.Split(w.header["Content-Type"], ";", 2)[0] + switch baseType { + case "text/html": + io.WriteString(w, "<!-- ") + for w.written < min { + io.WriteString(w, msg) + } + io.WriteString(w, " -->") + case "text/plain": + io.WriteString(w, "\n") + for w.written < min { + io.WriteString(w, msg) + } + } +} + +func (w *response) finishRequest() { + // If this was an HTTP/1.0 request with keep-alive and we sent a Content-Length + // back, we can make this a keep-alive response ... + if w.req.wantsHttp10KeepAlive() { + _, sentLength := w.header["Content-Length"] + if sentLength && w.header["Connection"] == "keep-alive" { + w.closeAfterReply = false + } + } + if !w.wroteHeader { + w.WriteHeader(StatusOK) + } + errorKludge(w) + if w.chunking { + io.WriteString(w.conn.buf, "0\r\n") + // trailer key/value pairs, followed by blank line + io.WriteString(w.conn.buf, "\r\n") + } + w.conn.buf.Flush() + w.req.Body.Close() +} + +// Flush implements the ResponseWriter.Flush method. +func (w *response) Flush() { + if !w.wroteHeader { + w.WriteHeader(StatusOK) + } + w.conn.buf.Flush() +} + +// Close the connection. +func (c *conn) close() { + if c.buf != nil { + c.buf.Flush() + c.buf = nil + } + if c.rwc != nil { + c.rwc.Close() + c.rwc = nil + } +} + +// Serve a new connection. +func (c *conn) serve() { + for { + w, err := c.readRequest() + if err != nil { + break + } + // HTTP cannot have multiple simultaneous active requests.[*] + // Until the server replies to this request, it can't read another, + // so we might as well run the handler in this goroutine. + // [*] Not strictly true: HTTP pipelining. We could let them all process + // in parallel even if their responses need to be serialized. + c.handler.ServeHTTP(w, w.req) + if c.hijacked { + return + } + w.finishRequest() + if w.closeAfterReply { + break + } + } + c.close() +} + +// Hijack impements the ResponseWriter.Hijack method. +func (w *response) Hijack() (rwc io.ReadWriteCloser, buf *bufio.ReadWriter, err os.Error) { + if w.conn.hijacked { + return nil, nil, ErrHijacked + } + w.conn.hijacked = true + rwc = w.conn.rwc + buf = w.conn.buf + w.conn.rwc = nil + w.conn.buf = nil + return +} + +// The HandlerFunc type is an adapter to allow the use of +// ordinary functions as HTTP handlers. If f is a function +// with the appropriate signature, HandlerFunc(f) is a +// Handler object that calls f. +type HandlerFunc func(ResponseWriter, *Request) + +// ServeHTTP calls f(w, req). +func (f HandlerFunc) ServeHTTP(w ResponseWriter, r *Request) { + f(w, r) +} + +// Helper handlers + +// Error replies to the request with the specified error message and HTTP code. +func Error(w ResponseWriter, error string, code int) { + w.SetHeader("Content-Type", "text/plain; charset=utf-8") + w.WriteHeader(code) + fmt.Fprintln(w, error) +} + +// NotFound replies to the request with an HTTP 404 not found error. +func NotFound(w ResponseWriter, r *Request) { Error(w, "404 page not found", StatusNotFound) } + +// NotFoundHandler returns a simple request handler +// that replies to each request with a ``404 page not found'' reply. +func NotFoundHandler() Handler { return HandlerFunc(NotFound) } + +// Redirect replies to the request with a redirect to url, +// which may be a path relative to the request path. +func Redirect(w ResponseWriter, r *Request, url string, code int) { + if u, err := ParseURL(url); err == nil { + // If url was relative, make absolute by + // combining with request path. + // The browser would probably do this for us, + // but doing it ourselves is more reliable. + + // NOTE(rsc): RFC 2616 says that the Location + // line must be an absolute URI, like + // "http://www.google.com/redirect/", + // not a path like "/redirect/". + // Unfortunately, we don't know what to + // put in the host name section to get the + // client to connect to us again, so we can't + // know the right absolute URI to send back. + // Because of this problem, no one pays attention + // to the RFC; they all send back just a new path. + // So do we. + oldpath := r.URL.Path + if oldpath == "" { // should not happen, but avoid a crash if it does + oldpath = "/" + } + if u.Scheme == "" { + // no leading http://server + if url == "" || url[0] != '/' { + // make relative path absolute + olddir, _ := path.Split(oldpath) + url = olddir + url + } + + // clean up but preserve trailing slash + trailing := url[len(url)-1] == '/' + url = path.Clean(url) + if trailing && url[len(url)-1] != '/' { + url += "/" + } + } + } + + w.SetHeader("Location", url) + w.WriteHeader(code) + + // RFC2616 recommends that a short note "SHOULD" be included in the + // response because older user agents may not understand 301/307. + // Shouldn't send the response for POST or HEAD; that leaves GET. + if r.Method == "GET" { + note := "<a href=\"" + htmlEscape(url) + "\">" + statusText[code] + "</a>.\n" + fmt.Fprintln(w, note) + } +} + +func htmlEscape(s string) string { + s = strings.Replace(s, "&", "&", -1) + s = strings.Replace(s, "<", "<", -1) + s = strings.Replace(s, ">", ">", -1) + s = strings.Replace(s, "\"", """, -1) + s = strings.Replace(s, "'", "'", -1) + return s +} + +// Redirect to a fixed URL +type redirectHandler struct { + url string + code int +} + +func (rh *redirectHandler) ServeHTTP(w ResponseWriter, r *Request) { + Redirect(w, r, rh.url, rh.code) +} + +// RedirectHandler returns a request handler that redirects +// each request it receives to the given url using the given +// status code. +func RedirectHandler(url string, code int) Handler { + return &redirectHandler{url, code} +} + +// ServeMux is an HTTP request multiplexer. +// It matches the URL of each incoming request against a list of registered +// patterns and calls the handler for the pattern that +// most closely matches the URL. +// +// Patterns named fixed paths, like "/favicon.ico", +// or subtrees, like "/images/" (note the trailing slash). +// Patterns must begin with /. +// Longer patterns take precedence over shorter ones, so that +// if there are handlers registered for both "/images/" +// and "/images/thumbnails/", the latter handler will be +// called for paths beginning "/images/thumbnails/" and the +// former will receiver requests for any other paths in the +// "/images/" subtree. +// +// In the future, the pattern syntax may be relaxed to allow +// an optional host-name at the beginning of the pattern, +// so that a handler might register for the two patterns +// "/codesearch" and "codesearch.google.com/" +// without taking over requests for http://www.google.com/. +// +// ServeMux also takes care of sanitizing the URL request path, +// redirecting any request containing . or .. elements to an +// equivalent .- and ..-free URL. +type ServeMux struct { + m map[string]Handler +} + +// NewServeMux allocates and returns a new ServeMux. +func NewServeMux() *ServeMux { return &ServeMux{make(map[string]Handler)} } + +// DefaultServeMux is the default ServeMux used by Serve. +var DefaultServeMux = NewServeMux() + +// Does path match pattern? +func pathMatch(pattern, path string) bool { + if len(pattern) == 0 { + // should not happen + return false + } + n := len(pattern) + if pattern[n-1] != '/' { + return pattern == path + } + return len(path) >= n && path[0:n] == pattern +} + +// Return the canonical path for p, eliminating . and .. elements. +func cleanPath(p string) string { + if p == "" { + return "/" + } + if p[0] != '/' { + p = "/" + p + } + np := path.Clean(p) + // path.Clean removes trailing slash except for root; + // put the trailing slash back if necessary. + if p[len(p)-1] == '/' && np != "/" { + np += "/" + } + return np +} + +// ServeHTTP dispatches the request to the handler whose +// pattern most closely matches the request URL. +func (mux *ServeMux) ServeHTTP(w ResponseWriter, r *Request) { + // Clean path to canonical form and redirect. + if p := cleanPath(r.URL.Path); p != r.URL.Path { + w.SetHeader("Location", p) + w.WriteHeader(StatusMovedPermanently) + return + } + + // Most-specific (longest) pattern wins. + var h Handler + var n = 0 + for k, v := range mux.m { + if !pathMatch(k, r.URL.Path) { + continue + } + if h == nil || len(k) > n { + n = len(k) + h = v + } + } + if h == nil { + h = NotFoundHandler() + } + h.ServeHTTP(w, r) +} + +// Handle registers the handler for the given pattern. +func (mux *ServeMux) Handle(pattern string, handler Handler) { + if pattern == "" || pattern[0] != '/' { + panic("http: invalid pattern " + pattern) + } + + mux.m[pattern] = handler + + // Helpful behavior: + // If pattern is /tree/, insert permanent redirect for /tree. + n := len(pattern) + if n > 0 && pattern[n-1] == '/' { + mux.m[pattern[0:n-1]] = RedirectHandler(pattern, StatusMovedPermanently) + } +} + +// HandleFunc registers the handler function for the given pattern. +func (mux *ServeMux) HandleFunc(pattern string, handler func(ResponseWriter, *Request)) { + mux.Handle(pattern, HandlerFunc(handler)) +} + +// Handle registers the handler for the given pattern +// in the DefaultServeMux. +func Handle(pattern string, handler Handler) { DefaultServeMux.Handle(pattern, handler) } + +// HandleFunc registers the handler function for the given pattern +// in the DefaultServeMux. +func HandleFunc(pattern string, handler func(ResponseWriter, *Request)) { + DefaultServeMux.HandleFunc(pattern, handler) +} + +// Serve accepts incoming HTTP connections on the listener l, +// creating a new service thread for each. The service threads +// read requests and then call handler to reply to them. +// Handler is typically nil, in which case the DefaultServeMux is used. +func Serve(l net.Listener, handler Handler) os.Error { + if handler == nil { + handler = DefaultServeMux + } + for { + rw, e := l.Accept() + if e != nil { + return e + } + c, err := newConn(rw, handler) + if err != nil { + continue + } + go c.serve() + } + panic("not reached") +} + +// ListenAndServe listens on the TCP network address addr +// and then calls Serve with handler to handle requests +// on incoming connections. Handler is typically nil, +// in which case the DefaultServeMux is used. +// +// A trivial example server is: +// +// package main +// +// import ( +// "http" +// "io" +// "log" +// ) +// +// // hello world, the web server +// func HelloServer(w http.ResponseWriter, req *http.Request) { +// io.WriteString(w, "hello, world!\n") +// } +// +// func main() { +// http.HandleFunc("/hello", HelloServer) +// err := http.ListenAndServe(":12345", nil) +// if err != nil { +// log.Exit("ListenAndServe: ", err.String()) +// } +// } +func ListenAndServe(addr string, handler Handler) os.Error { + l, e := net.Listen("tcp", addr) + if e != nil { + return e + } + e = Serve(l, handler) + l.Close() + return e +} + +// ListenAndServeTLS acts identically to ListenAndServe, except that it +// expects HTTPS connections. Additionally, files containing a certificate and +// matching private key for the server must be provided. +// +// A trivial example server is: +// +// import ( +// "http" +// "log" +// ) +// +// func handler(w http.ResponseWriter, req *http.Request) { +// w.SetHeader("Content-Type", "text/plain") +// w.Write([]byte("This is an example server.\n")) +// } +// +// func main() { +// http.HandleFunc("/", handler) +// log.Printf("About to listen on 10443. Go to https://127.0.0.1:10443/") +// err := http.ListenAndServeTLS(":10443", "cert.pem", "key.pem", nil) +// if err != nil { +// log.Exit(err) +// } +// } +// +// One can use generate_cert.go in crypto/tls to generate cert.pem and key.pem. +func ListenAndServeTLS(addr string, certFile string, keyFile string, handler Handler) os.Error { + config := &tls.Config{ + Rand: rand.Reader, + Time: time.Seconds, + NextProtos: []string{"http/1.1"}, + } + + var err os.Error + config.Certificates = make([]tls.Certificate, 1) + config.Certificates[0], err = tls.LoadX509KeyPair(certFile, keyFile) + if err != nil { + return err + } + + conn, err := net.Listen("tcp", addr) + if err != nil { + return err + } + + tlsListener := tls.NewListener(conn, config) + return Serve(tlsListener, handler) +} diff --git a/libgo/go/http/status.go b/libgo/go/http/status.go new file mode 100644 index 000000000..b6e2d65c6 --- /dev/null +++ b/libgo/go/http/status.go @@ -0,0 +1,106 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +// HTTP status codes, defined in RFC 2616. +const ( + StatusContinue = 100 + StatusSwitchingProtocols = 101 + + StatusOK = 200 + StatusCreated = 201 + StatusAccepted = 202 + StatusNonAuthoritativeInfo = 203 + StatusNoContent = 204 + StatusResetContent = 205 + StatusPartialContent = 206 + + StatusMultipleChoices = 300 + StatusMovedPermanently = 301 + StatusFound = 302 + StatusSeeOther = 303 + StatusNotModified = 304 + StatusUseProxy = 305 + StatusTemporaryRedirect = 307 + + StatusBadRequest = 400 + StatusUnauthorized = 401 + StatusPaymentRequired = 402 + StatusForbidden = 403 + StatusNotFound = 404 + StatusMethodNotAllowed = 405 + StatusNotAcceptable = 406 + StatusProxyAuthRequired = 407 + StatusRequestTimeout = 408 + StatusConflict = 409 + StatusGone = 410 + StatusLengthRequired = 411 + StatusPreconditionFailed = 412 + StatusRequestEntityTooLarge = 413 + StatusRequestURITooLong = 414 + StatusUnsupportedMediaType = 415 + StatusRequestedRangeNotSatisfiable = 416 + StatusExpectationFailed = 417 + + StatusInternalServerError = 500 + StatusNotImplemented = 501 + StatusBadGateway = 502 + StatusServiceUnavailable = 503 + StatusGatewayTimeout = 504 + StatusHTTPVersionNotSupported = 505 +) + +var statusText = map[int]string{ + StatusContinue: "Continue", + StatusSwitchingProtocols: "Switching Protocols", + + StatusOK: "OK", + StatusCreated: "Created", + StatusAccepted: "Accepted", + StatusNonAuthoritativeInfo: "Non-Authoritative Information", + StatusNoContent: "No Content", + StatusResetContent: "Reset Content", + StatusPartialContent: "Partial Content", + + StatusMultipleChoices: "Multiple Choices", + StatusMovedPermanently: "Moved Permanently", + StatusFound: "Found", + StatusSeeOther: "See Other", + StatusNotModified: "Not Modified", + StatusUseProxy: "Use Proxy", + StatusTemporaryRedirect: "Temporary Redirect", + + StatusBadRequest: "Bad Request", + StatusUnauthorized: "Unauthorized", + StatusPaymentRequired: "Payment Required", + StatusForbidden: "Forbidden", + StatusNotFound: "Not Found", + StatusMethodNotAllowed: "Method Not Allowed", + StatusNotAcceptable: "Not Acceptable", + StatusProxyAuthRequired: "Proxy Authentication Required", + StatusRequestTimeout: "Request Timeout", + StatusConflict: "Conflict", + StatusGone: "Gone", + StatusLengthRequired: "Length Required", + StatusPreconditionFailed: "Precondition Failed", + StatusRequestEntityTooLarge: "Request Entity Too Large", + StatusRequestURITooLong: "Request URI Too Long", + StatusUnsupportedMediaType: "Unsupported Media Type", + StatusRequestedRangeNotSatisfiable: "Requested Range Not Satisfiable", + StatusExpectationFailed: "Expectation Failed", + + StatusInternalServerError: "Internal Server Error", + StatusNotImplemented: "Not Implemented", + StatusBadGateway: "Bad Gateway", + StatusServiceUnavailable: "Service Unavailable", + StatusGatewayTimeout: "Gateway Timeout", + StatusHTTPVersionNotSupported: "HTTP Version Not Supported", +} + +// StatusText returns a text for the HTTP status code. It returns the empty +// string if the code is unknown. +func StatusText(code int) string { + return statusText[code] +} diff --git a/libgo/go/http/testdata/file b/libgo/go/http/testdata/file new file mode 100644 index 000000000..11f11f9be --- /dev/null +++ b/libgo/go/http/testdata/file @@ -0,0 +1 @@ +0123456789 diff --git a/libgo/go/http/transfer.go b/libgo/go/http/transfer.go new file mode 100644 index 000000000..e62885d62 --- /dev/null +++ b/libgo/go/http/transfer.go @@ -0,0 +1,441 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "bufio" + "io" + "os" + "strconv" + "strings" +) + +// transferWriter inspects the fields of a user-supplied Request or Response, +// sanitizes them without changing the user object and provides methods for +// writing the respective header, body and trailer in wire format. +type transferWriter struct { + Body io.ReadCloser + ResponseToHEAD bool + ContentLength int64 + Close bool + TransferEncoding []string + Trailer map[string]string +} + +func newTransferWriter(r interface{}) (t *transferWriter, err os.Error) { + t = &transferWriter{} + + // Extract relevant fields + atLeastHTTP11 := false + switch rr := r.(type) { + case *Request: + t.Body = rr.Body + t.ContentLength = rr.ContentLength + t.Close = rr.Close + t.TransferEncoding = rr.TransferEncoding + t.Trailer = rr.Trailer + atLeastHTTP11 = rr.ProtoAtLeast(1, 1) + case *Response: + t.Body = rr.Body + t.ContentLength = rr.ContentLength + t.Close = rr.Close + t.TransferEncoding = rr.TransferEncoding + t.Trailer = rr.Trailer + atLeastHTTP11 = rr.ProtoAtLeast(1, 1) + t.ResponseToHEAD = noBodyExpected(rr.RequestMethod) + } + + // Sanitize Body,ContentLength,TransferEncoding + if t.ResponseToHEAD { + t.Body = nil + t.TransferEncoding = nil + // ContentLength is expected to hold Content-Length + if t.ContentLength < 0 { + return nil, ErrMissingContentLength + } + } else { + if !atLeastHTTP11 || t.Body == nil { + t.TransferEncoding = nil + } + if chunked(t.TransferEncoding) { + t.ContentLength = -1 + } else if t.Body == nil { // no chunking, no body + t.ContentLength = 0 + } + } + + // Sanitize Trailer + if !chunked(t.TransferEncoding) { + t.Trailer = nil + } + + return t, nil +} + +func noBodyExpected(requestMethod string) bool { + return requestMethod == "HEAD" +} + +func (t *transferWriter) WriteHeader(w io.Writer) (err os.Error) { + if t.Close { + _, err = io.WriteString(w, "Connection: close\r\n") + if err != nil { + return + } + } + + // Write Content-Length and/or Transfer-Encoding whose values are a + // function of the sanitized field triple (Body, ContentLength, + // TransferEncoding) + if chunked(t.TransferEncoding) { + _, err = io.WriteString(w, "Transfer-Encoding: chunked\r\n") + if err != nil { + return + } + } else if t.ContentLength > 0 || t.ResponseToHEAD { + io.WriteString(w, "Content-Length: ") + _, err = io.WriteString(w, strconv.Itoa64(t.ContentLength)+"\r\n") + if err != nil { + return + } + } + + // Write Trailer header + if t.Trailer != nil { + // TODO: At some point, there should be a generic mechanism for + // writing long headers, using HTTP line splitting + io.WriteString(w, "Trailer: ") + needComma := false + for k := range t.Trailer { + k = CanonicalHeaderKey(k) + switch k { + case "Transfer-Encoding", "Trailer", "Content-Length": + return &badStringError{"invalid Trailer key", k} + } + if needComma { + io.WriteString(w, ",") + } + io.WriteString(w, k) + needComma = true + } + _, err = io.WriteString(w, "\r\n") + } + + return +} + +func (t *transferWriter) WriteBody(w io.Writer) (err os.Error) { + // Write body + if t.Body != nil { + if chunked(t.TransferEncoding) { + cw := NewChunkedWriter(w) + _, err = io.Copy(cw, t.Body) + if err == nil { + err = cw.Close() + } + } else if t.ContentLength == -1 { + _, err = io.Copy(w, t.Body) + } else { + _, err = io.Copy(w, io.LimitReader(t.Body, t.ContentLength)) + } + if err != nil { + return err + } + if err = t.Body.Close(); err != nil { + return err + } + } + + // TODO(petar): Place trailer writer code here. + if chunked(t.TransferEncoding) { + // Last chunk, empty trailer + _, err = io.WriteString(w, "\r\n") + } + + return +} + +type transferReader struct { + // Input + Header map[string]string + StatusCode int + RequestMethod string + ProtoMajor int + ProtoMinor int + // Output + Body io.ReadCloser + ContentLength int64 + TransferEncoding []string + Close bool + Trailer map[string]string +} + +// msg is *Request or *Response. +func readTransfer(msg interface{}, r *bufio.Reader) (err os.Error) { + t := &transferReader{} + + // Unify input + switch rr := msg.(type) { + case *Response: + t.Header = rr.Header + t.StatusCode = rr.StatusCode + t.RequestMethod = rr.RequestMethod + t.ProtoMajor = rr.ProtoMajor + t.ProtoMinor = rr.ProtoMinor + t.Close = shouldClose(t.ProtoMajor, t.ProtoMinor, t.Header) + case *Request: + t.Header = rr.Header + t.ProtoMajor = rr.ProtoMajor + t.ProtoMinor = rr.ProtoMinor + // Transfer semantics for Requests are exactly like those for + // Responses with status code 200, responding to a GET method + t.StatusCode = 200 + t.RequestMethod = "GET" + } + + // Default to HTTP/1.1 + if t.ProtoMajor == 0 && t.ProtoMinor == 0 { + t.ProtoMajor, t.ProtoMinor = 1, 1 + } + + // Transfer encoding, content length + t.TransferEncoding, err = fixTransferEncoding(t.Header) + if err != nil { + return err + } + + t.ContentLength, err = fixLength(t.StatusCode, t.RequestMethod, t.Header, t.TransferEncoding) + if err != nil { + return err + } + + // Trailer + t.Trailer, err = fixTrailer(t.Header, t.TransferEncoding) + if err != nil { + return err + } + + // Prepare body reader. ContentLength < 0 means chunked encoding + // or close connection when finished, since multipart is not supported yet + switch { + case chunked(t.TransferEncoding): + t.Body = &body{Reader: newChunkedReader(r), hdr: msg, r: r, closing: t.Close} + case t.ContentLength >= 0: + // TODO: limit the Content-Length. This is an easy DoS vector. + t.Body = &body{Reader: io.LimitReader(r, t.ContentLength), closing: t.Close} + default: + // t.ContentLength < 0, i.e. "Content-Length" not mentioned in header + if t.Close { + // Close semantics (i.e. HTTP/1.0) + t.Body = &body{Reader: r, closing: t.Close} + } else { + // Persistent connection (i.e. HTTP/1.1) + t.Body = &body{Reader: io.LimitReader(r, 0), closing: t.Close} + } + // TODO(petar): It may be a good idea, for extra robustness, to + // assume ContentLength=0 for GET requests (and other special + // cases?). This logic should be in fixLength(). + } + + // Unify output + switch rr := msg.(type) { + case *Request: + rr.Body = t.Body + rr.ContentLength = t.ContentLength + rr.TransferEncoding = t.TransferEncoding + rr.Close = t.Close + rr.Trailer = t.Trailer + case *Response: + rr.Body = t.Body + rr.ContentLength = t.ContentLength + rr.TransferEncoding = t.TransferEncoding + rr.Close = t.Close + rr.Trailer = t.Trailer + } + + return nil +} + +// Checks whether chunked is part of the encodings stack +func chunked(te []string) bool { return len(te) > 0 && te[0] == "chunked" } + +// Sanitize transfer encoding +func fixTransferEncoding(header map[string]string) ([]string, os.Error) { + raw, present := header["Transfer-Encoding"] + if !present { + return nil, nil + } + + header["Transfer-Encoding"] = "", false + encodings := strings.Split(raw, ",", -1) + te := make([]string, 0, len(encodings)) + // TODO: Even though we only support "identity" and "chunked" + // encodings, the loop below is designed with foresight. One + // invariant that must be maintained is that, if present, + // chunked encoding must always come first. + for _, encoding := range encodings { + encoding = strings.ToLower(strings.TrimSpace(encoding)) + // "identity" encoding is not recored + if encoding == "identity" { + break + } + if encoding != "chunked" { + return nil, &badStringError{"unsupported transfer encoding", encoding} + } + te = te[0 : len(te)+1] + te[len(te)-1] = encoding + } + if len(te) > 1 { + return nil, &badStringError{"too many transfer encodings", strings.Join(te, ",")} + } + if len(te) > 0 { + // Chunked encoding trumps Content-Length. See RFC 2616 + // Section 4.4. Currently len(te) > 0 implies chunked + // encoding. + header["Content-Length"] = "", false + return te, nil + } + + return nil, nil +} + +// Determine the expected body length, using RFC 2616 Section 4.4. This +// function is not a method, because ultimately it should be shared by +// ReadResponse and ReadRequest. +func fixLength(status int, requestMethod string, header map[string]string, te []string) (int64, os.Error) { + + // Logic based on response type or status + if noBodyExpected(requestMethod) { + return 0, nil + } + if status/100 == 1 { + return 0, nil + } + switch status { + case 204, 304: + return 0, nil + } + + // Logic based on Transfer-Encoding + if chunked(te) { + return -1, nil + } + + // Logic based on Content-Length + if cl, present := header["Content-Length"]; present { + cl = strings.TrimSpace(cl) + if cl != "" { + n, err := strconv.Atoi64(cl) + if err != nil || n < 0 { + return -1, &badStringError{"bad Content-Length", cl} + } + return n, nil + } else { + header["Content-Length"] = "", false + } + } + + // Logic based on media type. The purpose of the following code is just + // to detect whether the unsupported "multipart/byteranges" is being + // used. A proper Content-Type parser is needed in the future. + if strings.Contains(strings.ToLower(header["Content-Type"]), "multipart/byteranges") { + return -1, ErrNotSupported + } + + // Body-EOF logic based on other methods (like closing, or chunked coding) + return -1, nil +} + +// Determine whether to hang up after sending a request and body, or +// receiving a response and body +// 'header' is the request headers +func shouldClose(major, minor int, header map[string]string) bool { + if major < 1 { + return true + } else if major == 1 && minor == 0 { + v, present := header["Connection"] + if !present { + return true + } + v = strings.ToLower(v) + if !strings.Contains(v, "keep-alive") { + return true + } + return false + } else if v, present := header["Connection"]; present { + // TODO: Should split on commas, toss surrounding white space, + // and check each field. + if v == "close" { + header["Connection"] = "", false + return true + } + } + return false +} + +// Parse the trailer header +func fixTrailer(header map[string]string, te []string) (map[string]string, os.Error) { + raw, present := header["Trailer"] + if !present { + return nil, nil + } + + header["Trailer"] = "", false + trailer := make(map[string]string) + keys := strings.Split(raw, ",", -1) + for _, key := range keys { + key = CanonicalHeaderKey(strings.TrimSpace(key)) + switch key { + case "Transfer-Encoding", "Trailer", "Content-Length": + return nil, &badStringError{"bad trailer key", key} + } + trailer[key] = "" + } + if len(trailer) == 0 { + return nil, nil + } + if !chunked(te) { + // Trailer and no chunking + return nil, ErrUnexpectedTrailer + } + return trailer, nil +} + +// body turns a Reader into a ReadCloser. +// Close ensures that the body has been fully read +// and then reads the trailer if necessary. +type body struct { + io.Reader + hdr interface{} // non-nil (Response or Request) value means read trailer + r *bufio.Reader // underlying wire-format reader for the trailer + closing bool // is the connection to be closed after reading body? +} + +func (b *body) Close() os.Error { + if b.hdr == nil && b.closing { + // no trailer and closing the connection next. + // no point in reading to EOF. + return nil + } + + trashBuf := make([]byte, 1024) // local for thread safety + for { + _, err := b.Read(trashBuf) + if err == nil { + continue + } + if err == os.EOF { + break + } + return err + } + if b.hdr == nil { // not reading trailer + return nil + } + + // TODO(petar): Put trailer reader code here + + return nil +} diff --git a/libgo/go/http/url.go b/libgo/go/http/url.go new file mode 100644 index 000000000..efd90d81e --- /dev/null +++ b/libgo/go/http/url.go @@ -0,0 +1,595 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Parse URLs (actually URIs, but that seems overly pedantic). +// RFC 3986 + +package http + +import ( + "os" + "strconv" + "strings" +) + +// URLError reports an error and the operation and URL that caused it. +type URLError struct { + Op string + URL string + Error os.Error +} + +func (e *URLError) String() string { return e.Op + " " + e.URL + ": " + e.Error.String() } + +func ishex(c byte) bool { + switch { + case '0' <= c && c <= '9': + return true + case 'a' <= c && c <= 'f': + return true + case 'A' <= c && c <= 'F': + return true + } + return false +} + +func unhex(c byte) byte { + switch { + case '0' <= c && c <= '9': + return c - '0' + case 'a' <= c && c <= 'f': + return c - 'a' + 10 + case 'A' <= c && c <= 'F': + return c - 'A' + 10 + } + return 0 +} + +type encoding int + +const ( + encodePath encoding = 1 + iota + encodeUserPassword + encodeQueryComponent + encodeFragment + encodeOpaque +) + + +type URLEscapeError string + +func (e URLEscapeError) String() string { + return "invalid URL escape " + strconv.Quote(string(e)) +} + +// Return true if the specified character should be escaped when +// appearing in a URL string, according to RFC 2396. +// When 'all' is true the full range of reserved characters are matched. +func shouldEscape(c byte, mode encoding) bool { + // RFC 2396 §2.3 Unreserved characters (alphanum) + if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { + return false + } + switch c { + case '-', '_', '.', '!', '~', '*', '\'', '(', ')': // §2.3 Unreserved characters (mark) + return false + + case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) + // Different sections of the URL allow a few of + // the reserved characters to appear unescaped. + switch mode { + case encodePath: // §3.3 + // The RFC allows : @ & = + $ , but saves / ; for assigning + // meaning to individual path segments. This package + // only manipulates the path as a whole, so we allow those + // last two as well. Clients that need to distinguish between + // `/foo;y=z/bar` and `/foo%3by=z/bar` will have to re-decode RawPath. + // That leaves only ? to escape. + return c == '?' + + case encodeUserPassword: // §3.2.2 + // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /. + // The parsing of userinfo treats : as special so we must escape that too. + return c == '@' || c == '/' || c == ':' + + case encodeQueryComponent: // §3.4 + // The RFC reserves (so we must escape) everything. + return true + + case encodeFragment: // §4.1 + // The RFC text is silent but the grammar allows + // everything, so escape nothing. + return false + + case encodeOpaque: // §3 opaque_part + // The RFC allows opaque_part to use all characters + // except that the leading / must be escaped. + // (We implement that case in String.) + return false + } + } + + // Everything else must be escaped. + return true +} + + +// URLUnescape unescapes a string in ``URL encoded'' form, +// converting %AB into the byte 0xAB and '+' into ' ' (space). +// It returns an error if any % is not followed +// by two hexadecimal digits. +// Despite the name, this encoding applies only to individual +// components of the query portion of the URL. +func URLUnescape(s string) (string, os.Error) { + return urlUnescape(s, encodeQueryComponent) +} + +// urlUnescape is like URLUnescape but mode specifies +// which section of the URL is being unescaped. +func urlUnescape(s string, mode encoding) (string, os.Error) { + // Count %, check that they're well-formed. + n := 0 + hasPlus := false + for i := 0; i < len(s); { + switch s[i] { + case '%': + n++ + if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { + s = s[i:] + if len(s) > 3 { + s = s[0:3] + } + return "", URLEscapeError(s) + } + i += 3 + case '+': + hasPlus = mode == encodeQueryComponent + i++ + default: + i++ + } + } + + if n == 0 && !hasPlus { + return s, nil + } + + t := make([]byte, len(s)-2*n) + j := 0 + for i := 0; i < len(s); { + switch s[i] { + case '%': + t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) + j++ + i += 3 + case '+': + if mode == encodeQueryComponent { + t[j] = ' ' + } else { + t[j] = '+' + } + j++ + i++ + default: + t[j] = s[i] + j++ + i++ + } + } + return string(t), nil +} + +// URLEscape converts a string into ``URL encoded'' form. +// Despite the name, this encoding applies only to individual +// components of the query portion of the URL. +func URLEscape(s string) string { + return urlEscape(s, encodeQueryComponent) +} + +func urlEscape(s string, mode encoding) string { + spaceCount, hexCount := 0, 0 + for i := 0; i < len(s); i++ { + c := s[i] + if shouldEscape(c, mode) { + if c == ' ' && mode == encodeQueryComponent { + spaceCount++ + } else { + hexCount++ + } + } + } + + if spaceCount == 0 && hexCount == 0 { + return s + } + + t := make([]byte, len(s)+2*hexCount) + j := 0 + for i := 0; i < len(s); i++ { + switch c := s[i]; { + case c == ' ' && mode == encodeQueryComponent: + t[j] = '+' + j++ + case shouldEscape(c, mode): + t[j] = '%' + t[j+1] = "0123456789abcdef"[c>>4] + t[j+2] = "0123456789abcdef"[c&15] + j += 3 + default: + t[j] = s[i] + j++ + } + } + return string(t) +} + +// UnescapeUserinfo parses the RawUserinfo field of a URL +// as the form user or user:password and unescapes and returns +// the two halves. +// +// This functionality should only be used with legacy web sites. +// RFC 2396 warns that interpreting Userinfo this way +// ``is NOT RECOMMENDED, because the passing of authentication +// information in clear text (such as URI) has proven to be a +// security risk in almost every case where it has been used.'' +func UnescapeUserinfo(rawUserinfo string) (user, password string, err os.Error) { + u, p := split(rawUserinfo, ':', true) + if user, err = urlUnescape(u, encodeUserPassword); err != nil { + return "", "", err + } + if password, err = urlUnescape(p, encodeUserPassword); err != nil { + return "", "", err + } + return +} + +// EscapeUserinfo combines user and password in the form +// user:password (or just user if password is empty) and then +// escapes it for use as the URL.RawUserinfo field. +// +// This functionality should only be used with legacy web sites. +// RFC 2396 warns that interpreting Userinfo this way +// ``is NOT RECOMMENDED, because the passing of authentication +// information in clear text (such as URI) has proven to be a +// security risk in almost every case where it has been used.'' +func EscapeUserinfo(user, password string) string { + raw := urlEscape(user, encodeUserPassword) + if password != "" { + raw += ":" + urlEscape(password, encodeUserPassword) + } + return raw +} + +// A URL represents a parsed URL (technically, a URI reference). +// The general form represented is: +// scheme://[userinfo@]host/path[?query][#fragment] +// The Raw, RawAuthority, RawPath, and RawQuery fields are in "wire format" +// (special characters must be hex-escaped if not meant to have special meaning). +// All other fields are logical values; '+' or '%' represent themselves. +// +// The various Raw values are supplied in wire format because +// clients typically have to split them into pieces before further +// decoding. +type URL struct { + Raw string // the original string + Scheme string // scheme + RawAuthority string // [userinfo@]host + RawUserinfo string // userinfo + Host string // host + RawPath string // /path[?query][#fragment] + Path string // /path + OpaquePath bool // path is opaque (unrooted when scheme is present) + RawQuery string // query + Fragment string // fragment +} + +// Maybe rawurl is of the form scheme:path. +// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*) +// If so, return scheme, path; else return "", rawurl. +func getscheme(rawurl string) (scheme, path string, err os.Error) { + for i := 0; i < len(rawurl); i++ { + c := rawurl[i] + switch { + case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': + // do nothing + case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': + if i == 0 { + return "", rawurl, nil + } + case c == ':': + if i == 0 { + return "", "", os.ErrorString("missing protocol scheme") + } + return rawurl[0:i], rawurl[i+1:], nil + default: + // we have encountered an invalid character, + // so there is no valid scheme + return "", rawurl, nil + } + } + return "", rawurl, nil +} + +// Maybe s is of the form t c u. +// If so, return t, c u (or t, u if cutc == true). +// If not, return s, "". +func split(s string, c byte, cutc bool) (string, string) { + for i := 0; i < len(s); i++ { + if s[i] == c { + if cutc { + return s[0:i], s[i+1:] + } + return s[0:i], s[i:] + } + } + return s, "" +} + +// ParseURL parses rawurl into a URL structure. +// The string rawurl is assumed not to have a #fragment suffix. +// (Web browsers strip #fragment before sending the URL to a web server.) +// The rawurl may be relative or absolute. +func ParseURL(rawurl string) (url *URL, err os.Error) { + return parseURL(rawurl, false) +} + +// ParseRequestURL parses rawurl into a URL structure. It assumes that +// rawurl was received from an HTTP request, so the rawurl is interpreted +// only as an absolute URI or an absolute path. +// The string rawurl is assumed not to have a #fragment suffix. +// (Web browsers strip #fragment before sending the URL to a web server.) +func ParseRequestURL(rawurl string) (url *URL, err os.Error) { + return parseURL(rawurl, true) +} + +// parseURL parses a URL from a string in one of two contexts. If +// viaRequest is true, the URL is assumed to have arrived via an HTTP request, +// in which case only absolute URLs or path-absolute relative URLs are allowed. +// If viaRequest is false, all forms of relative URLs are allowed. +func parseURL(rawurl string, viaRequest bool) (url *URL, err os.Error) { + if rawurl == "" { + err = os.ErrorString("empty url") + goto Error + } + url = new(URL) + url.Raw = rawurl + + // Split off possible leading "http:", "mailto:", etc. + // Cannot contain escaped characters. + var path string + if url.Scheme, path, err = getscheme(rawurl); err != nil { + goto Error + } + + leadingSlash := strings.HasPrefix(path, "/") + + if url.Scheme != "" && !leadingSlash { + // RFC 2396: + // Absolute URI (has scheme) with non-rooted path + // is uninterpreted. It doesn't even have a ?query. + // This is the case that handles mailto:name@example.com. + url.RawPath = path + + if url.Path, err = urlUnescape(path, encodeOpaque); err != nil { + goto Error + } + url.OpaquePath = true + } else { + if viaRequest && !leadingSlash { + err = os.ErrorString("invalid URI for request") + goto Error + } + + // Split off query before parsing path further. + url.RawPath = path + path, query := split(path, '?', false) + if len(query) > 1 { + url.RawQuery = query[1:] + } + + // Maybe path is //authority/path + if (url.Scheme != "" || !viaRequest) && + strings.HasPrefix(path, "//") && !strings.HasPrefix(path, "///") { + url.RawAuthority, path = split(path[2:], '/', false) + url.RawPath = url.RawPath[2+len(url.RawAuthority):] + } + + // Split authority into userinfo@host. + // If there's no @, split's default is wrong. Check explicitly. + var rawHost string + if strings.Index(url.RawAuthority, "@") < 0 { + rawHost = url.RawAuthority + } else { + url.RawUserinfo, rawHost = split(url.RawAuthority, '@', true) + } + + // We leave RawAuthority only in raw form because clients + // of common protocols should be using Userinfo and Host + // instead. Clients that wish to use RawAuthority will have to + // interpret it themselves: RFC 2396 does not define the meaning. + + if strings.Contains(rawHost, "%") { + // Host cannot contain escaped characters. + err = os.ErrorString("hexadecimal escape in host") + goto Error + } + url.Host = rawHost + + if url.Path, err = urlUnescape(path, encodePath); err != nil { + goto Error + } + } + return url, nil + +Error: + return nil, &URLError{"parse", rawurl, err} + +} + +// ParseURLReference is like ParseURL but allows a trailing #fragment. +func ParseURLReference(rawurlref string) (url *URL, err os.Error) { + // Cut off #frag. + rawurl, frag := split(rawurlref, '#', false) + if url, err = ParseURL(rawurl); err != nil { + return nil, err + } + url.Raw += frag + url.RawPath += frag + if len(frag) > 1 { + frag = frag[1:] + if url.Fragment, err = urlUnescape(frag, encodeFragment); err != nil { + return nil, &URLError{"parse", rawurl, err} + } + } + return url, nil +} + +// String reassembles url into a valid URL string. +// +// There are redundant fields stored in the URL structure: +// the String method consults Scheme, Path, Host, RawUserinfo, +// RawQuery, and Fragment, but not Raw, RawPath or Authority. +func (url *URL) String() string { + result := "" + if url.Scheme != "" { + result += url.Scheme + ":" + } + if url.Host != "" || url.RawUserinfo != "" { + result += "//" + if url.RawUserinfo != "" { + // hide the password, if any + info := url.RawUserinfo + if i := strings.Index(info, ":"); i >= 0 { + info = info[0:i] + ":******" + } + result += info + "@" + } + result += url.Host + } + if url.OpaquePath { + path := url.Path + if strings.HasPrefix(path, "/") { + result += "%2f" + path = path[1:] + } + result += urlEscape(path, encodeOpaque) + } else { + result += urlEscape(url.Path, encodePath) + } + if url.RawQuery != "" { + result += "?" + url.RawQuery + } + if url.Fragment != "" { + result += "#" + urlEscape(url.Fragment, encodeFragment) + } + return result +} + +// EncodeQuery encodes the query represented as a multimap. +func EncodeQuery(m map[string][]string) string { + parts := make([]string, 0, len(m)) // will be large enough for most uses + for k, vs := range m { + prefix := URLEscape(k) + "=" + for _, v := range vs { + parts = append(parts, prefix+URLEscape(v)) + } + } + return strings.Join(parts, "&") +} + +// resolvePath applies special path segments from refs and applies +// them to base, per RFC 2396. +func resolvePath(basepath string, refpath string) string { + base := strings.Split(basepath, "/", -1) + refs := strings.Split(refpath, "/", -1) + if len(base) == 0 { + base = []string{""} + } + for idx, ref := range refs { + switch { + case ref == ".": + base[len(base)-1] = "" + case ref == "..": + newLen := len(base) - 1 + if newLen < 1 { + newLen = 1 + } + base = base[0:newLen] + base[len(base)-1] = "" + default: + if idx == 0 || base[len(base)-1] == "" { + base[len(base)-1] = ref + } else { + base = append(base, ref) + } + } + } + return strings.Join(base, "/") +} + +// IsAbs returns true if the URL is absolute. +func (url *URL) IsAbs() bool { + return url.Scheme != "" +} + +// ParseURL parses a URL in the context of a base URL. The URL in ref +// may be relative or absolute. ParseURL returns nil, err on parse +// failure, otherwise its return value is the same as ResolveReference. +func (base *URL) ParseURL(ref string) (*URL, os.Error) { + refurl, err := ParseURL(ref) + if err != nil { + return nil, err + } + return base.ResolveReference(refurl), nil +} + +// ResolveReference resolves a URI reference to an absolute URI from +// an absolute base URI, per RFC 2396 Section 5.2. The URI reference +// may be relative or absolute. ResolveReference always returns a new +// URL instance, even if the returned URL is identical to either the +// base or reference. If ref is an absolute URL, then ResolveReference +// ignores base and returns a copy of ref. +func (base *URL) ResolveReference(ref *URL) *URL { + url := new(URL) + switch { + case ref.IsAbs(): + *url = *ref + default: + // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + *url = *base + if ref.RawAuthority != "" { + // The "net_path" case. + url.RawAuthority = ref.RawAuthority + url.Host = ref.Host + url.RawUserinfo = ref.RawUserinfo + } + switch { + case url.OpaquePath: + url.Path = ref.Path + url.RawPath = ref.RawPath + url.RawQuery = ref.RawQuery + case strings.HasPrefix(ref.Path, "/"): + // The "abs_path" case. + url.Path = ref.Path + url.RawPath = ref.RawPath + url.RawQuery = ref.RawQuery + default: + // The "rel_path" case. + path := resolvePath(base.Path, ref.Path) + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + url.Path = path + url.RawPath = url.Path + url.RawQuery = ref.RawQuery + if ref.RawQuery != "" { + url.RawPath += "?" + url.RawQuery + } + } + + url.Fragment = ref.Fragment + } + url.Raw = url.String() + return url +} diff --git a/libgo/go/http/url_test.go b/libgo/go/http/url_test.go new file mode 100644 index 000000000..0801f7ff3 --- /dev/null +++ b/libgo/go/http/url_test.go @@ -0,0 +1,675 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "fmt" + "os" + "reflect" + "testing" +) + +// TODO(rsc): +// test URLUnescape +// test URLEscape +// test ParseURL + +type URLTest struct { + in string + out *URL + roundtrip string // expected result of reserializing the URL; empty means same as "in". +} + +var urltests = []URLTest{ + // no path + { + "http://www.google.com", + &URL{ + Raw: "http://www.google.com", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + }, + "", + }, + // path + { + "http://www.google.com/", + &URL{ + Raw: "http://www.google.com/", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/", + Path: "/", + }, + "", + }, + // path with hex escaping + { + "http://www.google.com/file%20one%26two", + &URL{ + Raw: "http://www.google.com/file%20one%26two", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/file%20one%26two", + Path: "/file one&two", + }, + "http://www.google.com/file%20one&two", + }, + // user + { + "ftp://webmaster@www.google.com/", + &URL{ + Raw: "ftp://webmaster@www.google.com/", + Scheme: "ftp", + RawAuthority: "webmaster@www.google.com", + RawUserinfo: "webmaster", + Host: "www.google.com", + RawPath: "/", + Path: "/", + }, + "", + }, + // escape sequence in username + { + "ftp://john%20doe@www.google.com/", + &URL{ + Raw: "ftp://john%20doe@www.google.com/", + Scheme: "ftp", + RawAuthority: "john%20doe@www.google.com", + RawUserinfo: "john%20doe", + Host: "www.google.com", + RawPath: "/", + Path: "/", + }, + "ftp://john%20doe@www.google.com/", + }, + // query + { + "http://www.google.com/?q=go+language", + &URL{ + Raw: "http://www.google.com/?q=go+language", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/?q=go+language", + Path: "/", + RawQuery: "q=go+language", + }, + "", + }, + // query with hex escaping: NOT parsed + { + "http://www.google.com/?q=go%20language", + &URL{ + Raw: "http://www.google.com/?q=go%20language", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/?q=go%20language", + Path: "/", + RawQuery: "q=go%20language", + }, + "", + }, + // %20 outside query + { + "http://www.google.com/a%20b?q=c+d", + &URL{ + Raw: "http://www.google.com/a%20b?q=c+d", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/a%20b?q=c+d", + Path: "/a b", + RawQuery: "q=c+d", + }, + "", + }, + // path without leading /, so no query parsing + { + "http:www.google.com/?q=go+language", + &URL{ + Raw: "http:www.google.com/?q=go+language", + Scheme: "http", + RawPath: "www.google.com/?q=go+language", + Path: "www.google.com/?q=go+language", + OpaquePath: true, + }, + "http:www.google.com/?q=go+language", + }, + // path without leading /, so no query parsing + { + "http:%2f%2fwww.google.com/?q=go+language", + &URL{ + Raw: "http:%2f%2fwww.google.com/?q=go+language", + Scheme: "http", + RawPath: "%2f%2fwww.google.com/?q=go+language", + Path: "//www.google.com/?q=go+language", + OpaquePath: true, + }, + "http:%2f/www.google.com/?q=go+language", + }, + // non-authority + { + "mailto:/webmaster@golang.org", + &URL{ + Raw: "mailto:/webmaster@golang.org", + Scheme: "mailto", + RawPath: "/webmaster@golang.org", + Path: "/webmaster@golang.org", + }, + "", + }, + // non-authority + { + "mailto:webmaster@golang.org", + &URL{ + Raw: "mailto:webmaster@golang.org", + Scheme: "mailto", + RawPath: "webmaster@golang.org", + Path: "webmaster@golang.org", + OpaquePath: true, + }, + "", + }, + // unescaped :// in query should not create a scheme + { + "/foo?query=http://bad", + &URL{ + Raw: "/foo?query=http://bad", + RawPath: "/foo?query=http://bad", + Path: "/foo", + RawQuery: "query=http://bad", + }, + "", + }, + // leading // without scheme should create an authority + { + "//foo", + &URL{ + RawAuthority: "foo", + Raw: "//foo", + Host: "foo", + Scheme: "", + RawPath: "", + Path: "", + }, + "", + }, + // leading // without scheme, with userinfo, path, and query + { + "//user@foo/path?a=b", + &URL{ + Raw: "//user@foo/path?a=b", + RawAuthority: "user@foo", + RawUserinfo: "user", + Scheme: "", + RawPath: "/path?a=b", + Path: "/path", + RawQuery: "a=b", + Host: "foo", + }, + "", + }, + // Three leading slashes isn't an authority, but doesn't return an error. + // (We can't return an error, as this code is also used via + // ServeHTTP -> ReadRequest -> ParseURL, which is arguably a + // different URL parsing context, but currently shares the + // same codepath) + { + "///threeslashes", + &URL{ + RawAuthority: "", + Raw: "///threeslashes", + Host: "", + Scheme: "", + RawPath: "///threeslashes", + Path: "///threeslashes", + }, + "", + }, + { + "http://user:password@google.com", + &URL{ + Raw: "http://user:password@google.com", + Scheme: "http", + RawAuthority: "user:password@google.com", + RawUserinfo: "user:password", + Host: "google.com", + }, + "http://user:******@google.com", + }, + { + "http://user:longerpass@google.com", + &URL{ + Raw: "http://user:longerpass@google.com", + Scheme: "http", + RawAuthority: "user:longerpass@google.com", + RawUserinfo: "user:longerpass", + Host: "google.com", + }, + "http://user:******@google.com", + }, +} + +var urlnofragtests = []URLTest{ + { + "http://www.google.com/?q=go+language#foo", + &URL{ + Raw: "http://www.google.com/?q=go+language#foo", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/?q=go+language#foo", + Path: "/", + RawQuery: "q=go+language#foo", + }, + "", + }, +} + +var urlfragtests = []URLTest{ + { + "http://www.google.com/?q=go+language#foo", + &URL{ + Raw: "http://www.google.com/?q=go+language#foo", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/?q=go+language#foo", + Path: "/", + RawQuery: "q=go+language", + Fragment: "foo", + }, + "", + }, + { + "http://www.google.com/?q=go+language#foo%26bar", + &URL{ + Raw: "http://www.google.com/?q=go+language#foo%26bar", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/?q=go+language#foo%26bar", + Path: "/", + RawQuery: "q=go+language", + Fragment: "foo&bar", + }, + "http://www.google.com/?q=go+language#foo&bar", + }, +} + +// more useful string for debugging than fmt's struct printer +func ufmt(u *URL) string { + return fmt.Sprintf("raw=%q, scheme=%q, rawpath=%q, auth=%q, userinfo=%q, host=%q, path=%q, rawq=%q, frag=%q", + u.Raw, u.Scheme, u.RawPath, u.RawAuthority, u.RawUserinfo, + u.Host, u.Path, u.RawQuery, u.Fragment) +} + +func DoTest(t *testing.T, parse func(string) (*URL, os.Error), name string, tests []URLTest) { + for _, tt := range tests { + u, err := parse(tt.in) + if err != nil { + t.Errorf("%s(%q) returned error %s", name, tt.in, err) + continue + } + if !reflect.DeepEqual(u, tt.out) { + t.Errorf("%s(%q):\n\thave %v\n\twant %v\n", + name, tt.in, ufmt(u), ufmt(tt.out)) + } + } +} + +func TestParseURL(t *testing.T) { + DoTest(t, ParseURL, "ParseURL", urltests) + DoTest(t, ParseURL, "ParseURL", urlnofragtests) +} + +func TestParseURLReference(t *testing.T) { + DoTest(t, ParseURLReference, "ParseURLReference", urltests) + DoTest(t, ParseURLReference, "ParseURLReference", urlfragtests) +} + +const pathThatLooksSchemeRelative = "//not.a.user@not.a.host/just/a/path" + +var parseRequestUrlTests = []struct { + url string + expectedValid bool +}{ + {"http://foo.com", true}, + {"http://foo.com/", true}, + {"http://foo.com/path", true}, + {"/", true}, + {pathThatLooksSchemeRelative, true}, + {"//not.a.user@%66%6f%6f.com/just/a/path/also", true}, + {"foo.html", false}, + {"../dir/", false}, +} + +func TestParseRequestURL(t *testing.T) { + for _, test := range parseRequestUrlTests { + _, err := ParseRequestURL(test.url) + valid := err == nil + if valid != test.expectedValid { + t.Errorf("Expected valid=%v for %q; got %v", test.expectedValid, test.url, valid) + } + } + + url, err := ParseRequestURL(pathThatLooksSchemeRelative) + if err != nil { + t.Fatalf("Unexpected error %v", err) + } + if url.Path != pathThatLooksSchemeRelative { + t.Errorf("Expected path %q; got %q", pathThatLooksSchemeRelative, url.Path) + } +} + +func DoTestString(t *testing.T, parse func(string) (*URL, os.Error), name string, tests []URLTest) { + for _, tt := range tests { + u, err := parse(tt.in) + if err != nil { + t.Errorf("%s(%q) returned error %s", name, tt.in, err) + continue + } + s := u.String() + expected := tt.in + if len(tt.roundtrip) > 0 { + expected = tt.roundtrip + } + if s != expected { + t.Errorf("%s(%q).String() == %q (expected %q)", name, tt.in, s, expected) + } + } +} + +func TestURLString(t *testing.T) { + DoTestString(t, ParseURL, "ParseURL", urltests) + DoTestString(t, ParseURL, "ParseURL", urlnofragtests) + DoTestString(t, ParseURLReference, "ParseURLReference", urltests) + DoTestString(t, ParseURLReference, "ParseURLReference", urlfragtests) +} + +type URLEscapeTest struct { + in string + out string + err os.Error +} + +var unescapeTests = []URLEscapeTest{ + { + "", + "", + nil, + }, + { + "abc", + "abc", + nil, + }, + { + "1%41", + "1A", + nil, + }, + { + "1%41%42%43", + "1ABC", + nil, + }, + { + "%4a", + "J", + nil, + }, + { + "%6F", + "o", + nil, + }, + { + "%", // not enough characters after % + "", + URLEscapeError("%"), + }, + { + "%a", // not enough characters after % + "", + URLEscapeError("%a"), + }, + { + "%1", // not enough characters after % + "", + URLEscapeError("%1"), + }, + { + "123%45%6", // not enough characters after % + "", + URLEscapeError("%6"), + }, + { + "%zzzzz", // invalid hex digits + "", + URLEscapeError("%zz"), + }, +} + +func TestURLUnescape(t *testing.T) { + for _, tt := range unescapeTests { + actual, err := URLUnescape(tt.in) + if actual != tt.out || (err != nil) != (tt.err != nil) { + t.Errorf("URLUnescape(%q) = %q, %s; want %q, %s", tt.in, actual, err, tt.out, tt.err) + } + } +} + +var escapeTests = []URLEscapeTest{ + { + "", + "", + nil, + }, + { + "abc", + "abc", + nil, + }, + { + "one two", + "one+two", + nil, + }, + { + "10%", + "10%25", + nil, + }, + { + " ?&=#+%!<>#\"{}|\\^[]`☺\t", + "+%3f%26%3d%23%2b%25!%3c%3e%23%22%7b%7d%7c%5c%5e%5b%5d%60%e2%98%ba%09", + nil, + }, +} + +func TestURLEscape(t *testing.T) { + for _, tt := range escapeTests { + actual := URLEscape(tt.in) + if tt.out != actual { + t.Errorf("URLEscape(%q) = %q, want %q", tt.in, actual, tt.out) + } + + // for bonus points, verify that escape:unescape is an identity. + roundtrip, err := URLUnescape(actual) + if roundtrip != tt.in || err != nil { + t.Errorf("URLUnescape(%q) = %q, %s; want %q, %s", actual, roundtrip, err, tt.in, "[no error]") + } + } +} + +type UserinfoTest struct { + User string + Password string + Raw string +} + +var userinfoTests = []UserinfoTest{ + {"user", "password", "user:password"}, + {"foo:bar", "~!@#$%^&*()_+{}|[]\\-=`:;'\"<>?,./", + "foo%3abar:~!%40%23$%25%5e&*()_+%7b%7d%7c%5b%5d%5c-=%60%3a;'%22%3c%3e?,.%2f"}, +} + +func TestEscapeUserinfo(t *testing.T) { + for _, tt := range userinfoTests { + if raw := EscapeUserinfo(tt.User, tt.Password); raw != tt.Raw { + t.Errorf("EscapeUserinfo(%q, %q) = %q, want %q", tt.User, tt.Password, raw, tt.Raw) + } + } +} + +func TestUnescapeUserinfo(t *testing.T) { + for _, tt := range userinfoTests { + if user, pass, err := UnescapeUserinfo(tt.Raw); user != tt.User || pass != tt.Password || err != nil { + t.Errorf("UnescapeUserinfo(%q) = %q, %q, %v, want %q, %q, nil", tt.Raw, user, pass, err, tt.User, tt.Password) + } + } +} + +type qMap map[string][]string + +type EncodeQueryTest struct { + m qMap + expected string + expected1 string +} + +var encodeQueryTests = []EncodeQueryTest{ + {nil, "", ""}, + {qMap{"q": {"puppies"}, "oe": {"utf8"}}, "q=puppies&oe=utf8", "oe=utf8&q=puppies"}, + {qMap{"q": {"dogs", "&", "7"}}, "q=dogs&q=%26&q=7", "q=dogs&q=%26&q=7"}, +} + +func TestEncodeQuery(t *testing.T) { + for _, tt := range encodeQueryTests { + if q := EncodeQuery(tt.m); q != tt.expected && q != tt.expected1 { + t.Errorf(`EncodeQuery(%+v) = %q, want %q`, tt.m, q, tt.expected) + } + } +} + +var resolvePathTests = []struct { + base, ref, expected string +}{ + {"a/b", ".", "a/"}, + {"a/b", "c", "a/c"}, + {"a/b", "..", ""}, + {"a/", "..", ""}, + {"a/", "../..", ""}, + {"a/b/c", "..", "a/"}, + {"a/b/c", "../d", "a/d"}, + {"a/b/c", ".././d", "a/d"}, + {"a/b", "./..", ""}, + {"a/./b", ".", "a/./"}, + {"a/../", ".", "a/../"}, + {"a/.././b", "c", "a/.././c"}, +} + +func TestResolvePath(t *testing.T) { + for _, test := range resolvePathTests { + got := resolvePath(test.base, test.ref) + if got != test.expected { + t.Errorf("For %q + %q got %q; expected %q", test.base, test.ref, got, test.expected) + } + } +} + +var resolveReferenceTests = []struct { + base, rel, expected string +}{ + // Absolute URL references + {"http://foo.com?a=b", "https://bar.com/", "https://bar.com/"}, + {"http://foo.com/", "https://bar.com/?a=b", "https://bar.com/?a=b"}, + {"http://foo.com/bar", "mailto:foo@example.com", "mailto:foo@example.com"}, + + // Path-absolute references + {"http://foo.com/bar", "/baz", "http://foo.com/baz"}, + {"http://foo.com/bar?a=b#f", "/baz", "http://foo.com/baz"}, + {"http://foo.com/bar?a=b", "/baz?c=d", "http://foo.com/baz?c=d"}, + + // Scheme-relative + {"https://foo.com/bar?a=b", "//bar.com/quux", "https://bar.com/quux"}, + + // Path-relative references: + + // ... current directory + {"http://foo.com", ".", "http://foo.com/"}, + {"http://foo.com/bar", ".", "http://foo.com/"}, + {"http://foo.com/bar/", ".", "http://foo.com/bar/"}, + + // ... going down + {"http://foo.com", "bar", "http://foo.com/bar"}, + {"http://foo.com/", "bar", "http://foo.com/bar"}, + {"http://foo.com/bar/baz", "quux", "http://foo.com/bar/quux"}, + + // ... going up + {"http://foo.com/bar/baz", "../quux", "http://foo.com/quux"}, + {"http://foo.com/bar/baz", "../../../../../quux", "http://foo.com/quux"}, + {"http://foo.com/bar", "..", "http://foo.com/"}, + {"http://foo.com/bar/baz", "./..", "http://foo.com/"}, + + // "." and ".." in the base aren't special + {"http://foo.com/dot/./dotdot/../foo/bar", "../baz", "http://foo.com/dot/./dotdot/../baz"}, + + // Triple dot isn't special + {"http://foo.com/bar", "...", "http://foo.com/..."}, + + // Fragment + {"http://foo.com/bar", ".#frag", "http://foo.com/#frag"}, +} + +func TestResolveReference(t *testing.T) { + mustParseURL := func(url string) *URL { + u, err := ParseURLReference(url) + if err != nil { + t.Fatalf("Expected URL to parse: %q, got error: %v", url, err) + } + return u + } + for _, test := range resolveReferenceTests { + base := mustParseURL(test.base) + rel := mustParseURL(test.rel) + url := base.ResolveReference(rel) + urlStr := url.String() + if urlStr != test.expected { + t.Errorf("Resolving %q + %q != %q; got %q", test.base, test.rel, test.expected, urlStr) + } + } + + // Test that new instances are returned. + base := mustParseURL("http://foo.com/") + abs := base.ResolveReference(mustParseURL(".")) + if base == abs { + t.Errorf("Expected no-op reference to return new URL instance.") + } + barRef := mustParseURL("http://bar.com/") + abs = base.ResolveReference(barRef) + if abs == barRef { + t.Errorf("Expected resolution of absolute reference to return new URL instance.") + } + + // Test the convenience wrapper too + base = mustParseURL("http://foo.com/path/one/") + abs, _ = base.ParseURL("../two") + expected := "http://foo.com/path/two" + if abs.String() != expected { + t.Errorf("ParseURL wrapper got %q; expected %q", abs.String(), expected) + } + _, err := base.ParseURL("") + if err == nil { + t.Errorf("Expected an error from ParseURL wrapper parsing an empty string.") + } + +} |