summaryrefslogtreecommitdiff
path: root/libgo/go/http/request.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/http/request.go')
-rw-r--r--libgo/go/http/request.go693
1 files changed, 693 insertions, 0 deletions
diff --git a/libgo/go/http/request.go b/libgo/go/http/request.go
new file mode 100644
index 000000000..04bebaaf5
--- /dev/null
+++ b/libgo/go/http/request.go
@@ -0,0 +1,693 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// HTTP Request reading and parsing.
+
+// The http package implements parsing of HTTP requests, replies,
+// and URLs and provides an extensible HTTP server and a basic
+// HTTP client.
+package http
+
+import (
+ "bufio"
+ "bytes"
+ "container/vector"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "mime"
+ "mime/multipart"
+ "os"
+ "strconv"
+ "strings"
+)
+
+const (
+ maxLineLength = 4096 // assumed <= bufio.defaultBufSize
+ maxValueLength = 4096
+ maxHeaderLines = 1024
+ chunkSize = 4 << 10 // 4 KB chunks
+)
+
+// HTTP request parsing errors.
+type ProtocolError struct {
+ os.ErrorString
+}
+
+var (
+ ErrLineTooLong = &ProtocolError{"header line too long"}
+ ErrHeaderTooLong = &ProtocolError{"header too long"}
+ ErrShortBody = &ProtocolError{"entity body too short"}
+ ErrNotSupported = &ProtocolError{"feature not supported"}
+ ErrUnexpectedTrailer = &ProtocolError{"trailer header without chunked transfer encoding"}
+ ErrMissingContentLength = &ProtocolError{"missing ContentLength in HEAD response"}
+ ErrNotMultipart = &ProtocolError{"request Content-Type isn't multipart/form-data"}
+ ErrMissingBoundary = &ProtocolError{"no multipart boundary param Content-Type"}
+)
+
+type badStringError struct {
+ what string
+ str string
+}
+
+func (e *badStringError) String() string { return fmt.Sprintf("%s %q", e.what, e.str) }
+
+var reqExcludeHeader = map[string]bool{
+ "Host": true,
+ "User-Agent": true,
+ "Referer": true,
+ "Content-Length": true,
+ "Transfer-Encoding": true,
+ "Trailer": true,
+}
+
+// A Request represents a parsed HTTP request header.
+type Request struct {
+ Method string // GET, POST, PUT, etc.
+ RawURL string // The raw URL given in the request.
+ URL *URL // Parsed URL.
+ Proto string // "HTTP/1.0"
+ ProtoMajor int // 1
+ ProtoMinor int // 0
+
+ // A header maps request lines to their values.
+ // If the header says
+ //
+ // accept-encoding: gzip, deflate
+ // Accept-Language: en-us
+ // Connection: keep-alive
+ //
+ // then
+ //
+ // Header = map[string]string{
+ // "Accept-Encoding": "gzip, deflate",
+ // "Accept-Language": "en-us",
+ // "Connection": "keep-alive",
+ // }
+ //
+ // HTTP defines that header names are case-insensitive.
+ // The request parser implements this by canonicalizing the
+ // name, making the first character and any characters
+ // following a hyphen uppercase and the rest lowercase.
+ Header map[string]string
+
+ // The message body.
+ Body io.ReadCloser
+
+ // ContentLength records the length of the associated content.
+ // The value -1 indicates that the length is unknown.
+ // Values >= 0 indicate that the given number of bytes may be read from Body.
+ ContentLength int64
+
+ // TransferEncoding lists the transfer encodings from outermost to innermost.
+ // An empty list denotes the "identity" encoding.
+ TransferEncoding []string
+
+ // Whether to close the connection after replying to this request.
+ Close bool
+
+ // The host on which the URL is sought.
+ // Per RFC 2616, this is either the value of the Host: header
+ // or the host name given in the URL itself.
+ Host string
+
+ // The referring URL, if sent in the request.
+ //
+ // Referer is misspelled as in the request itself,
+ // a mistake from the earliest days of HTTP.
+ // This value can also be fetched from the Header map
+ // as Header["Referer"]; the benefit of making it
+ // available as a structure field is that the compiler
+ // can diagnose programs that use the alternate
+ // (correct English) spelling req.Referrer but cannot
+ // diagnose programs that use Header["Referrer"].
+ Referer string
+
+ // The User-Agent: header string, if sent in the request.
+ UserAgent string
+
+ // The parsed form. Only available after ParseForm is called.
+ Form map[string][]string
+
+ // Trailer maps trailer keys to values. Like for Header, if the
+ // response has multiple trailer lines with the same key, they will be
+ // concatenated, delimited by commas.
+ Trailer map[string]string
+}
+
+// ProtoAtLeast returns whether the HTTP protocol used
+// in the request is at least major.minor.
+func (r *Request) ProtoAtLeast(major, minor int) bool {
+ return r.ProtoMajor > major ||
+ r.ProtoMajor == major && r.ProtoMinor >= minor
+}
+
+// MultipartReader returns a MIME multipart reader if this is a
+// multipart/form-data POST request, else returns nil and an error.
+func (r *Request) MultipartReader() (multipart.Reader, os.Error) {
+ v, ok := r.Header["Content-Type"]
+ if !ok {
+ return nil, ErrNotMultipart
+ }
+ d, params := mime.ParseMediaType(v)
+ if d != "multipart/form-data" {
+ return nil, ErrNotMultipart
+ }
+ boundary, ok := params["boundary"]
+ if !ok {
+ return nil, ErrMissingBoundary
+ }
+ return multipart.NewReader(r.Body, boundary), nil
+}
+
+// Return value if nonempty, def otherwise.
+func valueOrDefault(value, def string) string {
+ if value != "" {
+ return value
+ }
+ return def
+}
+
+const defaultUserAgent = "Go http package"
+
+// Write writes an HTTP/1.1 request -- header and body -- in wire format.
+// This method consults the following fields of req:
+// Host
+// RawURL, if non-empty, or else URL
+// Method (defaults to "GET")
+// UserAgent (defaults to defaultUserAgent)
+// Referer
+// Header
+// Body
+//
+// If Body is present, Write forces "Transfer-Encoding: chunked" as a header
+// and then closes Body when finished sending it.
+func (req *Request) Write(w io.Writer) os.Error {
+ host := req.Host
+ if host == "" {
+ host = req.URL.Host
+ }
+
+ uri := req.RawURL
+ if uri == "" {
+ uri = valueOrDefault(urlEscape(req.URL.Path, encodePath), "/")
+ if req.URL.RawQuery != "" {
+ uri += "?" + req.URL.RawQuery
+ }
+ }
+
+ fmt.Fprintf(w, "%s %s HTTP/1.1\r\n", valueOrDefault(req.Method, "GET"), uri)
+
+ // Header lines
+ fmt.Fprintf(w, "Host: %s\r\n", host)
+ fmt.Fprintf(w, "User-Agent: %s\r\n", valueOrDefault(req.UserAgent, defaultUserAgent))
+ if req.Referer != "" {
+ fmt.Fprintf(w, "Referer: %s\r\n", req.Referer)
+ }
+
+ // Process Body,ContentLength,Close,Trailer
+ tw, err := newTransferWriter(req)
+ if err != nil {
+ return err
+ }
+ err = tw.WriteHeader(w)
+ if err != nil {
+ return err
+ }
+
+ // TODO: split long values? (If so, should share code with Conn.Write)
+ // TODO: if Header includes values for Host, User-Agent, or Referer, this
+ // may conflict with the User-Agent or Referer headers we add manually.
+ // One solution would be to remove the Host, UserAgent, and Referer fields
+ // from Request, and introduce Request methods along the lines of
+ // Response.{GetHeader,AddHeader} and string constants for "Host",
+ // "User-Agent" and "Referer".
+ err = writeSortedKeyValue(w, req.Header, reqExcludeHeader)
+ if err != nil {
+ return err
+ }
+
+ io.WriteString(w, "\r\n")
+
+ // Write body and trailer
+ err = tw.WriteBody(w)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// Read a line of bytes (up to \n) from b.
+// Give up if the line exceeds maxLineLength.
+// The returned bytes are a pointer into storage in
+// the bufio, so they are only valid until the next bufio read.
+func readLineBytes(b *bufio.Reader) (p []byte, err os.Error) {
+ if p, err = b.ReadSlice('\n'); err != nil {
+ // We always know when EOF is coming.
+ // If the caller asked for a line, there should be a line.
+ if err == os.EOF {
+ err = io.ErrUnexpectedEOF
+ } else if err == bufio.ErrBufferFull {
+ err = ErrLineTooLong
+ }
+ return nil, err
+ }
+ if len(p) >= maxLineLength {
+ return nil, ErrLineTooLong
+ }
+
+ // Chop off trailing white space.
+ var i int
+ for i = len(p); i > 0; i-- {
+ if c := p[i-1]; c != ' ' && c != '\r' && c != '\t' && c != '\n' {
+ break
+ }
+ }
+ return p[0:i], nil
+}
+
+// readLineBytes, but convert the bytes into a string.
+func readLine(b *bufio.Reader) (s string, err os.Error) {
+ p, e := readLineBytes(b)
+ if e != nil {
+ return "", e
+ }
+ return string(p), nil
+}
+
+var colon = []byte{':'}
+
+// Read a key/value pair from b.
+// A key/value has the form Key: Value\r\n
+// and the Value can continue on multiple lines if each continuation line
+// starts with a space.
+func readKeyValue(b *bufio.Reader) (key, value string, err os.Error) {
+ line, e := readLineBytes(b)
+ if e != nil {
+ return "", "", e
+ }
+ if len(line) == 0 {
+ return "", "", nil
+ }
+
+ // Scan first line for colon.
+ i := bytes.Index(line, colon)
+ if i < 0 {
+ goto Malformed
+ }
+
+ key = string(line[0:i])
+ if strings.Contains(key, " ") {
+ // Key field has space - no good.
+ goto Malformed
+ }
+
+ // Skip initial space before value.
+ for i++; i < len(line); i++ {
+ if line[i] != ' ' {
+ break
+ }
+ }
+ value = string(line[i:])
+
+ // Look for extension lines, which must begin with space.
+ for {
+ c, e := b.ReadByte()
+ if c != ' ' {
+ if e != os.EOF {
+ b.UnreadByte()
+ }
+ break
+ }
+
+ // Eat leading space.
+ for c == ' ' {
+ if c, e = b.ReadByte(); e != nil {
+ if e == os.EOF {
+ e = io.ErrUnexpectedEOF
+ }
+ return "", "", e
+ }
+ }
+ b.UnreadByte()
+
+ // Read the rest of the line and add to value.
+ if line, e = readLineBytes(b); e != nil {
+ return "", "", e
+ }
+ value += " " + string(line)
+
+ if len(value) >= maxValueLength {
+ return "", "", &badStringError{"value too long for key", key}
+ }
+ }
+ return key, value, nil
+
+Malformed:
+ return "", "", &badStringError{"malformed header line", string(line)}
+}
+
+// Convert decimal at s[i:len(s)] to integer,
+// returning value, string position where the digits stopped,
+// and whether there was a valid number (digits, not too big).
+func atoi(s string, i int) (n, i1 int, ok bool) {
+ const Big = 1000000
+ if i >= len(s) || s[i] < '0' || s[i] > '9' {
+ return 0, 0, false
+ }
+ n = 0
+ for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
+ n = n*10 + int(s[i]-'0')
+ if n > Big {
+ return 0, 0, false
+ }
+ }
+ return n, i, true
+}
+
+// Parse HTTP version: "HTTP/1.2" -> (1, 2, true).
+func parseHTTPVersion(vers string) (int, int, bool) {
+ if len(vers) < 5 || vers[0:5] != "HTTP/" {
+ return 0, 0, false
+ }
+ major, i, ok := atoi(vers, 5)
+ if !ok || i >= len(vers) || vers[i] != '.' {
+ return 0, 0, false
+ }
+ var minor int
+ minor, i, ok = atoi(vers, i+1)
+ if !ok || i != len(vers) {
+ return 0, 0, false
+ }
+ return major, minor, true
+}
+
+// CanonicalHeaderKey returns the canonical format of the
+// HTTP header key s. The canonicalization converts the first
+// letter and any letter following a hyphen to upper case;
+// the rest are converted to lowercase. For example, the
+// canonical key for "accept-encoding" is "Accept-Encoding".
+func CanonicalHeaderKey(s string) string {
+ // canonicalize: first letter upper case
+ // and upper case after each dash.
+ // (Host, User-Agent, If-Modified-Since).
+ // HTTP headers are ASCII only, so no Unicode issues.
+ var a []byte
+ upper := true
+ for i := 0; i < len(s); i++ {
+ v := s[i]
+ if upper && 'a' <= v && v <= 'z' {
+ if a == nil {
+ a = []byte(s)
+ }
+ a[i] = v + 'A' - 'a'
+ }
+ if !upper && 'A' <= v && v <= 'Z' {
+ if a == nil {
+ a = []byte(s)
+ }
+ a[i] = v + 'a' - 'A'
+ }
+ upper = false
+ if v == '-' {
+ upper = true
+ }
+ }
+ if a != nil {
+ return string(a)
+ }
+ return s
+}
+
+type chunkedReader struct {
+ r *bufio.Reader
+ n uint64 // unread bytes in chunk
+ err os.Error
+}
+
+func newChunkedReader(r *bufio.Reader) *chunkedReader {
+ return &chunkedReader{r: r}
+}
+
+func (cr *chunkedReader) beginChunk() {
+ // chunk-size CRLF
+ var line string
+ line, cr.err = readLine(cr.r)
+ if cr.err != nil {
+ return
+ }
+ cr.n, cr.err = strconv.Btoui64(line, 16)
+ if cr.err != nil {
+ return
+ }
+ if cr.n == 0 {
+ // trailer CRLF
+ for {
+ line, cr.err = readLine(cr.r)
+ if cr.err != nil {
+ return
+ }
+ if line == "" {
+ break
+ }
+ }
+ cr.err = os.EOF
+ }
+}
+
+func (cr *chunkedReader) Read(b []uint8) (n int, err os.Error) {
+ if cr.err != nil {
+ return 0, cr.err
+ }
+ if cr.n == 0 {
+ cr.beginChunk()
+ if cr.err != nil {
+ return 0, cr.err
+ }
+ }
+ if uint64(len(b)) > cr.n {
+ b = b[0:cr.n]
+ }
+ n, cr.err = cr.r.Read(b)
+ cr.n -= uint64(n)
+ if cr.n == 0 && cr.err == nil {
+ // end of chunk (CRLF)
+ b := make([]byte, 2)
+ if _, cr.err = io.ReadFull(cr.r, b); cr.err == nil {
+ if b[0] != '\r' || b[1] != '\n' {
+ cr.err = os.NewError("malformed chunked encoding")
+ }
+ }
+ }
+ return n, cr.err
+}
+
+// ReadRequest reads and parses a request from b.
+func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
+ req = new(Request)
+
+ // First line: GET /index.html HTTP/1.0
+ var s string
+ if s, err = readLine(b); err != nil {
+ return nil, err
+ }
+
+ var f []string
+ if f = strings.Split(s, " ", 3); len(f) < 3 {
+ return nil, &badStringError{"malformed HTTP request", s}
+ }
+ req.Method, req.RawURL, req.Proto = f[0], f[1], f[2]
+ var ok bool
+ if req.ProtoMajor, req.ProtoMinor, ok = parseHTTPVersion(req.Proto); !ok {
+ return nil, &badStringError{"malformed HTTP version", req.Proto}
+ }
+
+ if req.URL, err = ParseRequestURL(req.RawURL); err != nil {
+ return nil, err
+ }
+
+ // Subsequent lines: Key: value.
+ nheader := 0
+ req.Header = make(map[string]string)
+ for {
+ var key, value string
+ if key, value, err = readKeyValue(b); err != nil {
+ return nil, err
+ }
+ if key == "" {
+ break
+ }
+ if nheader++; nheader >= maxHeaderLines {
+ return nil, ErrHeaderTooLong
+ }
+
+ key = CanonicalHeaderKey(key)
+
+ // RFC 2616 says that if you send the same header key
+ // multiple times, it has to be semantically equivalent
+ // to concatenating the values separated by commas.
+ oldvalue, present := req.Header[key]
+ if present {
+ req.Header[key] = oldvalue + "," + value
+ } else {
+ req.Header[key] = value
+ }
+ }
+
+ // RFC2616: Must treat
+ // GET /index.html HTTP/1.1
+ // Host: www.google.com
+ // and
+ // GET http://www.google.com/index.html HTTP/1.1
+ // Host: doesntmatter
+ // the same. In the second case, any Host line is ignored.
+ req.Host = req.URL.Host
+ if req.Host == "" {
+ req.Host = req.Header["Host"]
+ }
+ req.Header["Host"] = "", false
+
+ fixPragmaCacheControl(req.Header)
+
+ // Pull out useful fields as a convenience to clients.
+ req.Referer = req.Header["Referer"]
+ req.Header["Referer"] = "", false
+
+ req.UserAgent = req.Header["User-Agent"]
+ req.Header["User-Agent"] = "", false
+
+ // TODO: Parse specific header values:
+ // Accept
+ // Accept-Encoding
+ // Accept-Language
+ // Authorization
+ // Cache-Control
+ // Connection
+ // Date
+ // Expect
+ // From
+ // If-Match
+ // If-Modified-Since
+ // If-None-Match
+ // If-Range
+ // If-Unmodified-Since
+ // Max-Forwards
+ // Proxy-Authorization
+ // Referer [sic]
+ // TE (transfer-codings)
+ // Trailer
+ // Transfer-Encoding
+ // Upgrade
+ // User-Agent
+ // Via
+ // Warning
+
+ err = readTransfer(req, b)
+ if err != nil {
+ return nil, err
+ }
+
+ return req, nil
+}
+
+// ParseQuery parses the URL-encoded query string and returns
+// a map listing the values specified for each key.
+// ParseQuery always returns a non-nil map containing all the
+// valid query parameters found; err describes the first decoding error
+// encountered, if any.
+func ParseQuery(query string) (m map[string][]string, err os.Error) {
+ m = make(map[string][]string)
+ err = parseQuery(m, query)
+ return
+}
+
+func parseQuery(m map[string][]string, query string) (err os.Error) {
+ for _, kv := range strings.Split(query, "&", -1) {
+ if len(kv) == 0 {
+ continue
+ }
+ kvPair := strings.Split(kv, "=", 2)
+
+ var key, value string
+ var e os.Error
+ key, e = URLUnescape(kvPair[0])
+ if e == nil && len(kvPair) > 1 {
+ value, e = URLUnescape(kvPair[1])
+ }
+ if e != nil {
+ err = e
+ continue
+ }
+ vec := vector.StringVector(m[key])
+ vec.Push(value)
+ m[key] = vec
+ }
+ return err
+}
+
+// ParseForm parses the request body as a form for POST requests, or the raw query for GET requests.
+// It is idempotent.
+func (r *Request) ParseForm() (err os.Error) {
+ if r.Form != nil {
+ return
+ }
+
+ r.Form = make(map[string][]string)
+ if r.URL != nil {
+ err = parseQuery(r.Form, r.URL.RawQuery)
+ }
+ if r.Method == "POST" {
+ if r.Body == nil {
+ return os.ErrorString("missing form body")
+ }
+ ct := r.Header["Content-Type"]
+ switch strings.Split(ct, ";", 2)[0] {
+ case "text/plain", "application/x-www-form-urlencoded", "":
+ b, e := ioutil.ReadAll(r.Body)
+ if e != nil {
+ if err == nil {
+ err = e
+ }
+ break
+ }
+ e = parseQuery(r.Form, string(b))
+ if err == nil {
+ err = e
+ }
+ // TODO(dsymonds): Handle multipart/form-data
+ default:
+ return &badStringError{"unknown Content-Type", ct}
+ }
+ }
+ return err
+}
+
+// FormValue returns the first value for the named component of the query.
+// FormValue calls ParseForm if necessary.
+func (r *Request) FormValue(key string) string {
+ if r.Form == nil {
+ r.ParseForm()
+ }
+ if vs := r.Form[key]; len(vs) > 0 {
+ return vs[0]
+ }
+ return ""
+}
+
+func (r *Request) expectsContinue() bool {
+ expectation, ok := r.Header["Expect"]
+ return ok && strings.ToLower(expectation) == "100-continue"
+}
+
+func (r *Request) wantsHttp10KeepAlive() bool {
+ if r.ProtoMajor != 1 || r.ProtoMinor != 0 {
+ return false
+ }
+ value, exists := r.Header["Connection"]
+ if !exists {
+ return false
+ }
+ return strings.Contains(strings.ToLower(value), "keep-alive")
+}