diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json deleted file mode 100644 index 962d98b..0000000 --- a/Godeps/Godeps.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "ImportPath": "github.com/jimeh/kotaku-uk-rss", - "GoVersion": "go1.3", - "Deps": [ - { - "ImportPath": "code.google.com/p/cascadia", - "Comment": "null-30", - "Rev": "4f03c71bc42ba0015a68bea86422f0ecbb71bf70" - }, - { - "ImportPath": "code.google.com/p/go.net/html", - "Comment": "null-144", - "Rev": "ad01a6fcc8a19d3a4478c836895ffe883bd2ceab" - }, - { - "ImportPath": "github.com/PuerkitoBio/goquery", - "Comment": "v0.3.2-27-g1e5417b", - "Rev": "1e5417b3dbc2ca68de909fb56d9095daa680a166" - }, - { - "ImportPath": "github.com/gorilla/feeds", - "Rev": "2e133eb352fab1ff3569ee169e9a3a94f69c9081" - } - ] -} diff --git a/Godeps/Readme b/Godeps/Readme deleted file mode 100644 index 4cdaa53..0000000 --- a/Godeps/Readme +++ /dev/null @@ -1,5 +0,0 @@ -This directory tree is generated automatically by godep. - -Please do not edit. - -See https://github.com/tools/godep for more information. diff --git a/Godeps/_workspace/.gitignore b/Godeps/_workspace/.gitignore deleted file mode 100644 index f037d68..0000000 --- a/Godeps/_workspace/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/pkg -/bin diff --git a/Godeps/_workspace/src/code.google.com/p/cascadia/.hgignore b/Godeps/_workspace/src/code.google.com/p/cascadia/.hgignore deleted file mode 100644 index f97c559..0000000 --- a/Godeps/_workspace/src/code.google.com/p/cascadia/.hgignore +++ /dev/null @@ -1,6 +0,0 @@ -^_ -^\. -\.out$ -\.6$ -~$ -\.orig$ diff --git a/Godeps/_workspace/src/code.google.com/p/cascadia/Makefile b/Godeps/_workspace/src/code.google.com/p/cascadia/Makefile deleted file mode 100644 index c7d7a2b..0000000 --- a/Godeps/_workspace/src/code.google.com/p/cascadia/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -include $(GOROOT)/src/Make.inc - -TARG=cascadia - -GOFILES= \ - parser.go \ - selector.go \ - -include $(GOROOT)/src/Make.pkg - -format: - gofmt -w ${GOFILES} *_test.go diff --git a/Godeps/_workspace/src/code.google.com/p/cascadia/benchmark_test.go b/Godeps/_workspace/src/code.google.com/p/cascadia/benchmark_test.go deleted file mode 100644 index cdd6eb2..0000000 --- a/Godeps/_workspace/src/code.google.com/p/cascadia/benchmark_test.go +++ /dev/null @@ -1,52 +0,0 @@ -package cascadia - -import ( - "code.google.com/p/go.net/html" - "strings" - "testing" -) - -func MustParseHTML(doc string) *html.Node { - dom, err := html.Parse(strings.NewReader(doc)) - if err != nil { - panic(err) - } - return dom -} - -var selector = MustCompile(`div.matched`) -var doc = ` - -
-`, - "#foo", - []string{ - `
`, - }, - }, - { - `
`, - "li#t1", - []string{ - `
`, - "p.t1", - []string{ - `
`, - }, - }, - { - `
`, - ".t1.fail", - []string{}, - }, - { - `
`, - "p.t1.t2", - []string{ - `
`, - }, - }, - { - `
`, - "p[title]", - []string{ - `
`, - }, - }, - { - `
`, - `address[title="foo"]`, - []string{ - ``, - }, - }, - { - ``, - `[ title ~= foo ]`, - []string{ - `
`, - }, - }, - { - `
`, - `[title~="hello world"]`, - []string{}, - }, - { - `
`, - `[lang|="en"]`, - []string{ - `
`, - `
`, - }, - }, - { - `
`, - `[title^="foo"]`, - []string{ - `
`, - }, - }, - { - `
`, - `[title$="bar"]`, - []string{ - `
`, - }, - }, - { - `
`, - `[title*="bar"]`, - []string{ - `
`, - }, - }, - { - `
`, - ".t1:not(.t2)", - []string{}, - }, - { - `
some text and a span and another
`, - `span:first-child`, - []string{ - ``, - }, - }, - { - `a span and some text`, - `span:last-child`, - []string{ - ``, - }, - }, - { - ``, - `p:nth-of-type(2)`, - []string{ - `
`, - }, - }, - { - `
`, - `p:nth-last-of-type(2)`, - []string{ - ``, - }, - }, - { - `
`, - `p:last-of-type`, - []string{ - ``, - }, - }, - { - `
`, - `p:first-of-type`, - []string{ - ``, - }, - }, - { - `
`, - }, - }, - { - `
`, - }, - }, - { - `
Hello
`,
- `:empty`,
- []string{
- ``,
- ` `,
- ``,
- },
- },
- {
- ` `,
- `div p`,
- []string{
- ` `,
- ` `,
- },
- },
- {
- ` `,
- `div table p`,
- []string{
- ` `,
- },
- },
- {
- ` `,
- ` `,
- },
- },
- {
- ` `,
- `p ~ p`,
- []string{
- ` `,
- ` `,
- },
- },
- {
- ` `,
- `p + p`,
- []string{
- ` `,
- },
- },
- {
- ` `,
- `li, p`,
- []string{
- " ",
- },
- },
- {
- ` `,
- `p +/*This is a comment*/ p`,
- []string{
- ` `,
- },
- },
- {
- ` Text block that wraps inner text and continues `,
- },
- },
- {
- ` Text block that wraps inner text and continues Text block that wraps inner text and continues Text block that wraps inner text and continues `,
- },
- },
- {
- ` text content contents 1 contents 2 contents 1 contents 2 `,
- },
- },
- {
- ` contents 1 contents 2 `,
- },
- },
- {
- ` 0123456789 abcdef 0123ABCD `,
- ` `,
- },
- },
- {
- ` 0123456789 abcdef 0123ABCD `,
- },
- },
- {
- ` 0123456789 abcdef 0123ABCD `,
- ` `,
- },
- },
- {
- ` 0123456789 abcdef 0123ABCD `,
- ` `,
- },
- },
- {
- ` 0123456789 abcdef 0123ABCD `,
- ` `,
- ` `,
- },
- },
- {
- ` 0123456789 abcdef 0123ABCD `,
- },
- },
- {
- ` 0123456789 abcdef 0123ABCD `,
- ` `,
- },
- },
- {
- ` 0123456789 `,
- ``,
- },
- },
- {
- ``,
- `[href#=(fina)]:not([href#=(\/\/[^\/]+untrusted)])`,
- []string{
- ``,
- ``,
- },
- },
- {
- ``,
- `[href#=(^https:\/\/[^\/]*\/?news)]`,
- []string{
- ``,
- },
- },
-}
-
-func TestSelectors(t *testing.T) {
- for _, test := range selectorTests {
- s, err := Compile(test.selector)
- if err != nil {
- t.Errorf("error compiling %q: %s", test.selector, err)
- continue
- }
-
- doc, err := html.Parse(strings.NewReader(test.HTML))
- if err != nil {
- t.Errorf("error parsing %q: %s", test.HTML, err)
- continue
- }
-
- matches := s.MatchAll(doc)
- if len(matches) != len(test.results) {
- t.Errorf("wanted %d elements, got %d instead", len(test.results), len(matches))
- continue
- }
-
- for i, m := range matches {
- got := nodeString(m)
- if got != test.results[i] {
- t.Errorf("wanted %s, got %s instead", test.results[i], got)
- }
- }
-
- firstMatch := s.MatchFirst(doc)
- if len(test.results) == 0 {
- if firstMatch != nil {
- t.Errorf("MatchFirst: want nil, got %s", nodeString(firstMatch))
- }
- } else {
- got := nodeString(firstMatch)
- if got != test.results[0] {
- t.Errorf("MatchFirst: want %s, got %s", test.results[0], got)
- }
- }
- }
-}
diff --git a/Godeps/_workspace/src/code.google.com/p/go.net/html/atom/atom_test.go b/Godeps/_workspace/src/code.google.com/p/go.net/html/atom/atom_test.go
deleted file mode 100644
index 6e33704..0000000
--- a/Godeps/_workspace/src/code.google.com/p/go.net/html/atom/atom_test.go
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package atom
-
-import (
- "sort"
- "testing"
-)
-
-func TestKnown(t *testing.T) {
- for _, s := range testAtomList {
- if atom := Lookup([]byte(s)); atom.String() != s {
- t.Errorf("Lookup(%q) = %#x (%q)", s, uint32(atom), atom.String())
- }
- }
-}
-
-func TestHits(t *testing.T) {
- for _, a := range table {
- if a == 0 {
- continue
- }
- got := Lookup([]byte(a.String()))
- if got != a {
- t.Errorf("Lookup(%q) = %#x, want %#x", a.String(), uint32(got), uint32(a))
- }
- }
-}
-
-func TestMisses(t *testing.T) {
- testCases := []string{
- "",
- "\x00",
- "\xff",
- "A",
- "DIV",
- "Div",
- "dIV",
- "aa",
- "a\x00",
- "ab",
- "abb",
- "abbr0",
- "abbr ",
- " abbr",
- " a",
- "acceptcharset",
- "acceptCharset",
- "accept_charset",
- "h0",
- "h1h2",
- "h7",
- "onClick",
- "λ",
- // The following string has the same hash (0xa1d7fab7) as "onmouseover".
- "\x00\x00\x00\x00\x00\x50\x18\xae\x38\xd0\xb7",
- }
- for _, tc := range testCases {
- got := Lookup([]byte(tc))
- if got != 0 {
- t.Errorf("Lookup(%q): got %d, want 0", tc, got)
- }
- }
-}
-
-func TestForeignObject(t *testing.T) {
- const (
- afo = Foreignobject
- afO = ForeignObject
- sfo = "foreignobject"
- sfO = "foreignObject"
- )
- if got := Lookup([]byte(sfo)); got != afo {
- t.Errorf("Lookup(%q): got %#v, want %#v", sfo, got, afo)
- }
- if got := Lookup([]byte(sfO)); got != afO {
- t.Errorf("Lookup(%q): got %#v, want %#v", sfO, got, afO)
- }
- if got := afo.String(); got != sfo {
- t.Errorf("Atom(%#v).String(): got %q, want %q", afo, got, sfo)
- }
- if got := afO.String(); got != sfO {
- t.Errorf("Atom(%#v).String(): got %q, want %q", afO, got, sfO)
- }
-}
-
-func BenchmarkLookup(b *testing.B) {
- sortedTable := make([]string, 0, len(table))
- for _, a := range table {
- if a != 0 {
- sortedTable = append(sortedTable, a.String())
- }
- }
- sort.Strings(sortedTable)
-
- x := make([][]byte, 1000)
- for i := range x {
- x[i] = []byte(sortedTable[i%len(sortedTable)])
- }
-
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- for _, s := range x {
- Lookup(s)
- }
- }
-}
diff --git a/Godeps/_workspace/src/code.google.com/p/go.net/html/atom/table.go b/Godeps/_workspace/src/code.google.com/p/go.net/html/atom/table.go
deleted file mode 100644
index 20b8b8a..0000000
--- a/Godeps/_workspace/src/code.google.com/p/go.net/html/atom/table.go
+++ /dev/null
@@ -1,694 +0,0 @@
-// generated by go run gen.go; DO NOT EDIT
-
-package atom
-
-const (
- A Atom = 0x1
- Abbr Atom = 0x4
- Accept Atom = 0x2106
- AcceptCharset Atom = 0x210e
- Accesskey Atom = 0x3309
- Action Atom = 0x21b06
- Address Atom = 0x5d507
- Align Atom = 0x1105
- Alt Atom = 0x4503
- Annotation Atom = 0x18d0a
- AnnotationXml Atom = 0x18d0e
- Applet Atom = 0x2d106
- Area Atom = 0x31804
- Article Atom = 0x39907
- Aside Atom = 0x4f05
- Async Atom = 0x9305
- Audio Atom = 0xaf05
- Autocomplete Atom = 0xd50c
- Autofocus Atom = 0xe109
- Autoplay Atom = 0x10c08
- B Atom = 0x101
- Base Atom = 0x11404
- Basefont Atom = 0x11408
- Bdi Atom = 0x1a03
- Bdo Atom = 0x12503
- Bgsound Atom = 0x13807
- Big Atom = 0x14403
- Blink Atom = 0x14705
- Blockquote Atom = 0x14c0a
- Body Atom = 0x2f04
- Border Atom = 0x15606
- Br Atom = 0x202
- Button Atom = 0x15c06
- Canvas Atom = 0x4b06
- Caption Atom = 0x1e007
- Center Atom = 0x2df06
- Challenge Atom = 0x23e09
- Charset Atom = 0x2807
- Checked Atom = 0x33f07
- Cite Atom = 0x9704
- Class Atom = 0x3d905
- Code Atom = 0x16f04
- Col Atom = 0x17603
- Colgroup Atom = 0x17608
- Color Atom = 0x18305
- Cols Atom = 0x18804
- Colspan Atom = 0x18807
- Command Atom = 0x19b07
- Content Atom = 0x42c07
- Contenteditable Atom = 0x42c0f
- Contextmenu Atom = 0x3480b
- Controls Atom = 0x1ae08
- Coords Atom = 0x1ba06
- Crossorigin Atom = 0x1c40b
- Data Atom = 0x44304
- Datalist Atom = 0x44308
- Datetime Atom = 0x25b08
- Dd Atom = 0x28802
- Default Atom = 0x5207
- Defer Atom = 0x17105
- Del Atom = 0x4d603
- Desc Atom = 0x4804
- Details Atom = 0x6507
- Dfn Atom = 0x8303
- Dialog Atom = 0x1b06
- Dir Atom = 0x9d03
- Dirname Atom = 0x9d07
- Disabled Atom = 0x10008
- Div Atom = 0x10703
- Dl Atom = 0x13e02
- Download Atom = 0x40908
- Draggable Atom = 0x1a109
- Dropzone Atom = 0x3a208
- Dt Atom = 0x4e402
- Em Atom = 0x7f02
- Embed Atom = 0x7f05
- Enctype Atom = 0x23007
- Face Atom = 0x2dd04
- Fieldset Atom = 0x1d508
- Figcaption Atom = 0x1dd0a
- Figure Atom = 0x1f106
- Font Atom = 0x11804
- Footer Atom = 0x5906
- For Atom = 0x1fd03
- ForeignObject Atom = 0x1fd0d
- Foreignobject Atom = 0x20a0d
- Form Atom = 0x21704
- Formaction Atom = 0x2170a
- Formenctype Atom = 0x22c0b
- Formmethod Atom = 0x2470a
- Formnovalidate Atom = 0x2510e
- Formtarget Atom = 0x2660a
- Frame Atom = 0x8705
- Frameset Atom = 0x8708
- H1 Atom = 0x13602
- H2 Atom = 0x29602
- H3 Atom = 0x2c502
- H4 Atom = 0x30e02
- H5 Atom = 0x4e602
- H6 Atom = 0x27002
- Head Atom = 0x2fa04
- Header Atom = 0x2fa06
- Headers Atom = 0x2fa07
- Height Atom = 0x27206
- Hgroup Atom = 0x27a06
- Hidden Atom = 0x28606
- High Atom = 0x29304
- Hr Atom = 0x13102
- Href Atom = 0x29804
- Hreflang Atom = 0x29808
- Html Atom = 0x27604
- HttpEquiv Atom = 0x2a00a
- I Atom = 0x601
- Icon Atom = 0x42b04
- Id Atom = 0x5102
- Iframe Atom = 0x2b406
- Image Atom = 0x2ba05
- Img Atom = 0x2bf03
- Inert Atom = 0x4c105
- Input Atom = 0x3f605
- Ins Atom = 0x1cd03
- Isindex Atom = 0x2c707
- Ismap Atom = 0x2ce05
- Itemid Atom = 0x9806
- Itemprop Atom = 0x57e08
- Itemref Atom = 0x2d707
- Itemscope Atom = 0x2e509
- Itemtype Atom = 0x2ef08
- Kbd Atom = 0x1903
- Keygen Atom = 0x3906
- Keytype Atom = 0x51207
- Kind Atom = 0xfd04
- Label Atom = 0xba05
- Lang Atom = 0x29c04
- Legend Atom = 0x1a806
- Li Atom = 0x1202
- Link Atom = 0x14804
- List Atom = 0x44704
- Listing Atom = 0x44707
- Loop Atom = 0xbe04
- Low Atom = 0x13f03
- Malignmark Atom = 0x100a
- Manifest Atom = 0x5b608
- Map Atom = 0x2d003
- Mark Atom = 0x1604
- Marquee Atom = 0x5f207
- Math Atom = 0x2f704
- Max Atom = 0x30603
- Maxlength Atom = 0x30609
- Media Atom = 0xa205
- Mediagroup Atom = 0xa20a
- Menu Atom = 0x34f04
- Meta Atom = 0x45604
- Meter Atom = 0x26105
- Method Atom = 0x24b06
- Mglyph Atom = 0x2c006
- Mi Atom = 0x9b02
- Min Atom = 0x31003
- Mn Atom = 0x25402
- Mo Atom = 0x47a02
- Ms Atom = 0x2e802
- Mtext Atom = 0x31305
- Multiple Atom = 0x32108
- Muted Atom = 0x32905
- Name Atom = 0xa004
- Nav Atom = 0x3e03
- Nobr Atom = 0x7404
- Noembed Atom = 0x7d07
- Noframes Atom = 0x8508
- Noscript Atom = 0x28b08
- Novalidate Atom = 0x2550a
- Object Atom = 0x21106
- Ol Atom = 0xcd02
- Onabort Atom = 0x16007
- Onafterprint Atom = 0x1e50c
- Onbeforeprint Atom = 0x21f0d
- Onbeforeunload Atom = 0x5c90e
- Onblur Atom = 0x3e206
- Oncancel Atom = 0xb308
- Oncanplay Atom = 0x12709
- Oncanplaythrough Atom = 0x12710
- Onchange Atom = 0x3b808
- Onclick Atom = 0x2ad07
- Onclose Atom = 0x32e07
- Oncontextmenu Atom = 0x3460d
- Oncuechange Atom = 0x3530b
- Ondblclick Atom = 0x35e0a
- Ondrag Atom = 0x36806
- Ondragend Atom = 0x36809
- Ondragenter Atom = 0x3710b
- Ondragleave Atom = 0x37c0b
- Ondragover Atom = 0x3870a
- Ondragstart Atom = 0x3910b
- Ondrop Atom = 0x3a006
- Ondurationchange Atom = 0x3b010
- Onemptied Atom = 0x3a709
- Onended Atom = 0x3c007
- Onerror Atom = 0x3c707
- Onfocus Atom = 0x3ce07
- Onhashchange Atom = 0x3e80c
- Oninput Atom = 0x3f407
- Oninvalid Atom = 0x3fb09
- Onkeydown Atom = 0x40409
- Onkeypress Atom = 0x4110a
- Onkeyup Atom = 0x42107
- Onload Atom = 0x43b06
- Onloadeddata Atom = 0x43b0c
- Onloadedmetadata Atom = 0x44e10
- Onloadstart Atom = 0x4640b
- Onmessage Atom = 0x46f09
- Onmousedown Atom = 0x4780b
- Onmousemove Atom = 0x4830b
- Onmouseout Atom = 0x48e0a
- Onmouseover Atom = 0x49b0b
- Onmouseup Atom = 0x4a609
- Onmousewheel Atom = 0x4af0c
- Onoffline Atom = 0x4bb09
- Ononline Atom = 0x4c608
- Onpagehide Atom = 0x4ce0a
- Onpageshow Atom = 0x4d90a
- Onpause Atom = 0x4e807
- Onplay Atom = 0x4f206
- Onplaying Atom = 0x4f209
- Onpopstate Atom = 0x4fb0a
- Onprogress Atom = 0x5050a
- Onratechange Atom = 0x5190c
- Onreset Atom = 0x52507
- Onresize Atom = 0x52c08
- Onscroll Atom = 0x53a08
- Onseeked Atom = 0x54208
- Onseeking Atom = 0x54a09
- Onselect Atom = 0x55308
- Onshow Atom = 0x55d06
- Onstalled Atom = 0x56609
- Onstorage Atom = 0x56f09
- Onsubmit Atom = 0x57808
- Onsuspend Atom = 0x58809
- Ontimeupdate Atom = 0x1190c
- Onunload Atom = 0x59108
- Onvolumechange Atom = 0x5990e
- Onwaiting Atom = 0x5a709
- Open Atom = 0x58404
- Optgroup Atom = 0xc008
- Optimum Atom = 0x5b007
- Option Atom = 0x5c506
- Output Atom = 0x49506
- P Atom = 0xc01
- Param Atom = 0xc05
- Pattern Atom = 0x6e07
- Ping Atom = 0xab04
- Placeholder Atom = 0xc70b
- Plaintext Atom = 0xf109
- Poster Atom = 0x17d06
- Pre Atom = 0x27f03
- Preload Atom = 0x27f07
- Progress Atom = 0x50708
- Prompt Atom = 0x5bf06
- Public Atom = 0x42706
- Q Atom = 0x15101
- Radiogroup Atom = 0x30a
- Readonly Atom = 0x31908
- Rel Atom = 0x28003
- Required Atom = 0x1f508
- Reversed Atom = 0x5e08
- Rows Atom = 0x7704
- Rowspan Atom = 0x7707
- Rp Atom = 0x1eb02
- Rt Atom = 0x16502
- Ruby Atom = 0xd104
- S Atom = 0x2c01
- Samp Atom = 0x6b04
- Sandbox Atom = 0xe907
- Scope Atom = 0x2e905
- Scoped Atom = 0x2e906
- Script Atom = 0x28d06
- Seamless Atom = 0x33308
- Section Atom = 0x3dd07
- Select Atom = 0x55506
- Selected Atom = 0x55508
- Shape Atom = 0x1b505
- Size Atom = 0x53004
- Sizes Atom = 0x53005
- Small Atom = 0x1bf05
- Source Atom = 0x1cf06
- Spacer Atom = 0x30006
- Span Atom = 0x7a04
- Spellcheck Atom = 0x33a0a
- Src Atom = 0x3d403
- Srcdoc Atom = 0x3d406
- Srclang Atom = 0x41a07
- Start Atom = 0x39705
- Step Atom = 0x5bc04
- Strike Atom = 0x50e06
- Strong Atom = 0x53406
- Style Atom = 0x5db05
- Sub Atom = 0x57a03
- Summary Atom = 0x5e007
- Sup Atom = 0x5e703
- Svg Atom = 0x5ea03
- System Atom = 0x5ed06
- Tabindex Atom = 0x45c08
- Table Atom = 0x43605
- Target Atom = 0x26a06
- Tbody Atom = 0x2e05
- Td Atom = 0x4702
- Textarea Atom = 0x31408
- Tfoot Atom = 0x5805
- Th Atom = 0x13002
- Thead Atom = 0x2f905
- Time Atom = 0x11b04
- Title Atom = 0x8e05
- Tr Atom = 0xf902
- Track Atom = 0xf905
- Translate Atom = 0x16609
- Tt Atom = 0x7002
- Type Atom = 0x23304
- Typemustmatch Atom = 0x2330d
- U Atom = 0xb01
- Ul Atom = 0x5602
- Usemap Atom = 0x4ec06
- Value Atom = 0x4005
- Var Atom = 0x10903
- Video Atom = 0x2a905
- Wbr Atom = 0x14103
- Width Atom = 0x4e205
- Wrap Atom = 0x56204
- Xmp Atom = 0xef03
-)
-
-const hash0 = 0xc17da63e
-
-const maxAtomLen = 16
-
-var table = [1 << 9]Atom{
- 0x1: 0x4830b, // onmousemove
- 0x2: 0x5a709, // onwaiting
- 0x4: 0x5bf06, // prompt
- 0x7: 0x5b007, // optimum
- 0x8: 0x1604, // mark
- 0xa: 0x2d707, // itemref
- 0xb: 0x4d90a, // onpageshow
- 0xc: 0x55506, // select
- 0xd: 0x1a109, // draggable
- 0xe: 0x3e03, // nav
- 0xf: 0x19b07, // command
- 0x11: 0xb01, // u
- 0x14: 0x2fa07, // headers
- 0x15: 0x44308, // datalist
- 0x17: 0x6b04, // samp
- 0x1a: 0x40409, // onkeydown
- 0x1b: 0x53a08, // onscroll
- 0x1c: 0x17603, // col
- 0x20: 0x57e08, // itemprop
- 0x21: 0x2a00a, // http-equiv
- 0x22: 0x5e703, // sup
- 0x24: 0x1f508, // required
- 0x2b: 0x27f07, // preload
- 0x2c: 0x21f0d, // onbeforeprint
- 0x2d: 0x3710b, // ondragenter
- 0x2e: 0x4e402, // dt
- 0x2f: 0x57808, // onsubmit
- 0x30: 0x13102, // hr
- 0x31: 0x3460d, // oncontextmenu
- 0x33: 0x2ba05, // image
- 0x34: 0x4e807, // onpause
- 0x35: 0x27a06, // hgroup
- 0x36: 0xab04, // ping
- 0x37: 0x55308, // onselect
- 0x3a: 0x10703, // div
- 0x40: 0x9b02, // mi
- 0x41: 0x33308, // seamless
- 0x42: 0x2807, // charset
- 0x43: 0x5102, // id
- 0x44: 0x4fb0a, // onpopstate
- 0x45: 0x4d603, // del
- 0x46: 0x5f207, // marquee
- 0x47: 0x3309, // accesskey
- 0x49: 0x5906, // footer
- 0x4a: 0x2d106, // applet
- 0x4b: 0x2ce05, // ismap
- 0x51: 0x34f04, // menu
- 0x52: 0x2f04, // body
- 0x55: 0x8708, // frameset
- 0x56: 0x52507, // onreset
- 0x57: 0x14705, // blink
- 0x58: 0x8e05, // title
- 0x59: 0x39907, // article
- 0x5b: 0x13002, // th
- 0x5d: 0x15101, // q
- 0x5e: 0x58404, // open
- 0x5f: 0x31804, // area
- 0x61: 0x43b06, // onload
- 0x62: 0x3f605, // input
- 0x63: 0x11404, // base
- 0x64: 0x18807, // colspan
- 0x65: 0x51207, // keytype
- 0x66: 0x13e02, // dl
- 0x68: 0x1d508, // fieldset
- 0x6a: 0x31003, // min
- 0x6b: 0x10903, // var
- 0x6f: 0x2fa06, // header
- 0x70: 0x16502, // rt
- 0x71: 0x17608, // colgroup
- 0x72: 0x25402, // mn
- 0x74: 0x16007, // onabort
- 0x75: 0x3906, // keygen
- 0x76: 0x4bb09, // onoffline
- 0x77: 0x23e09, // challenge
- 0x78: 0x2d003, // map
- 0x7a: 0x30e02, // h4
- 0x7b: 0x3c707, // onerror
- 0x7c: 0x30609, // maxlength
- 0x7d: 0x31305, // mtext
- 0x7e: 0x5805, // tfoot
- 0x7f: 0x11804, // font
- 0x80: 0x100a, // malignmark
- 0x81: 0x45604, // meta
- 0x82: 0x9305, // async
- 0x83: 0x2c502, // h3
- 0x84: 0x28802, // dd
- 0x85: 0x29804, // href
- 0x86: 0xa20a, // mediagroup
- 0x87: 0x1ba06, // coords
- 0x88: 0x41a07, // srclang
- 0x89: 0x35e0a, // ondblclick
- 0x8a: 0x4005, // value
- 0x8c: 0xb308, // oncancel
- 0x8e: 0x33a0a, // spellcheck
- 0x8f: 0x8705, // frame
- 0x91: 0x14403, // big
- 0x94: 0x21b06, // action
- 0x95: 0x9d03, // dir
- 0x97: 0x31908, // readonly
- 0x99: 0x43605, // table
- 0x9a: 0x5e007, // summary
- 0x9b: 0x14103, // wbr
- 0x9c: 0x30a, // radiogroup
- 0x9d: 0xa004, // name
- 0x9f: 0x5ed06, // system
- 0xa1: 0x18305, // color
- 0xa2: 0x4b06, // canvas
- 0xa3: 0x27604, // html
- 0xa5: 0x54a09, // onseeking
- 0xac: 0x1b505, // shape
- 0xad: 0x28003, // rel
- 0xae: 0x12710, // oncanplaythrough
- 0xaf: 0x3870a, // ondragover
- 0xb1: 0x1fd0d, // foreignObject
- 0xb3: 0x7704, // rows
- 0xb6: 0x44707, // listing
- 0xb7: 0x49506, // output
- 0xb9: 0x3480b, // contextmenu
- 0xbb: 0x13f03, // low
- 0xbc: 0x1eb02, // rp
- 0xbd: 0x58809, // onsuspend
- 0xbe: 0x15c06, // button
- 0xbf: 0x4804, // desc
- 0xc1: 0x3dd07, // section
- 0xc2: 0x5050a, // onprogress
- 0xc3: 0x56f09, // onstorage
- 0xc4: 0x2f704, // math
- 0xc5: 0x4f206, // onplay
- 0xc7: 0x5602, // ul
- 0xc8: 0x6e07, // pattern
- 0xc9: 0x4af0c, // onmousewheel
- 0xca: 0x36809, // ondragend
- 0xcb: 0xd104, // ruby
- 0xcc: 0xc01, // p
- 0xcd: 0x32e07, // onclose
- 0xce: 0x26105, // meter
- 0xcf: 0x13807, // bgsound
- 0xd2: 0x27206, // height
- 0xd4: 0x101, // b
- 0xd5: 0x2ef08, // itemtype
- 0xd8: 0x1e007, // caption
- 0xd9: 0x10008, // disabled
- 0xdc: 0x5ea03, // svg
- 0xdd: 0x1bf05, // small
- 0xde: 0x44304, // data
- 0xe0: 0x4c608, // ononline
- 0xe1: 0x2c006, // mglyph
- 0xe3: 0x7f05, // embed
- 0xe4: 0xf902, // tr
- 0xe5: 0x4640b, // onloadstart
- 0xe7: 0x3b010, // ondurationchange
- 0xed: 0x12503, // bdo
- 0xee: 0x4702, // td
- 0xef: 0x4f05, // aside
- 0xf0: 0x29602, // h2
- 0xf1: 0x50708, // progress
- 0xf2: 0x14c0a, // blockquote
- 0xf4: 0xba05, // label
- 0xf5: 0x601, // i
- 0xf7: 0x7707, // rowspan
- 0xfb: 0x4f209, // onplaying
- 0xfd: 0x2bf03, // img
- 0xfe: 0xc008, // optgroup
- 0xff: 0x42c07, // content
- 0x101: 0x5190c, // onratechange
- 0x103: 0x3e80c, // onhashchange
- 0x104: 0x6507, // details
- 0x106: 0x40908, // download
- 0x109: 0xe907, // sandbox
- 0x10b: 0x42c0f, // contenteditable
- 0x10d: 0x37c0b, // ondragleave
- 0x10e: 0x2106, // accept
- 0x10f: 0x55508, // selected
- 0x112: 0x2170a, // formaction
- 0x113: 0x2df06, // center
- 0x115: 0x44e10, // onloadedmetadata
- 0x116: 0x14804, // link
- 0x117: 0x11b04, // time
- 0x118: 0x1c40b, // crossorigin
- 0x119: 0x3ce07, // onfocus
- 0x11a: 0x56204, // wrap
- 0x11b: 0x42b04, // icon
- 0x11d: 0x2a905, // video
- 0x11e: 0x3d905, // class
- 0x121: 0x5990e, // onvolumechange
- 0x122: 0x3e206, // onblur
- 0x123: 0x2e509, // itemscope
- 0x124: 0x5db05, // style
- 0x127: 0x42706, // public
- 0x129: 0x2510e, // formnovalidate
- 0x12a: 0x55d06, // onshow
- 0x12c: 0x16609, // translate
- 0x12d: 0x9704, // cite
- 0x12e: 0x2e802, // ms
- 0x12f: 0x1190c, // ontimeupdate
- 0x130: 0xfd04, // kind
- 0x131: 0x2660a, // formtarget
- 0x135: 0x3c007, // onended
- 0x136: 0x28606, // hidden
- 0x137: 0x2c01, // s
- 0x139: 0x2470a, // formmethod
- 0x13a: 0x44704, // list
- 0x13c: 0x27002, // h6
- 0x13d: 0xcd02, // ol
- 0x13e: 0x3530b, // oncuechange
- 0x13f: 0x20a0d, // foreignobject
- 0x143: 0x5c90e, // onbeforeunload
- 0x145: 0x3a709, // onemptied
- 0x146: 0x17105, // defer
- 0x147: 0xef03, // xmp
- 0x148: 0xaf05, // audio
- 0x149: 0x1903, // kbd
- 0x14c: 0x46f09, // onmessage
- 0x14d: 0x5c506, // option
- 0x14e: 0x4503, // alt
- 0x14f: 0x33f07, // checked
- 0x150: 0x10c08, // autoplay
- 0x152: 0x202, // br
- 0x153: 0x2550a, // novalidate
- 0x156: 0x7d07, // noembed
- 0x159: 0x2ad07, // onclick
- 0x15a: 0x4780b, // onmousedown
- 0x15b: 0x3b808, // onchange
- 0x15e: 0x3fb09, // oninvalid
- 0x15f: 0x2e906, // scoped
- 0x160: 0x1ae08, // controls
- 0x161: 0x32905, // muted
- 0x163: 0x4ec06, // usemap
- 0x164: 0x1dd0a, // figcaption
- 0x165: 0x36806, // ondrag
- 0x166: 0x29304, // high
- 0x168: 0x3d403, // src
- 0x169: 0x17d06, // poster
- 0x16b: 0x18d0e, // annotation-xml
- 0x16c: 0x5bc04, // step
- 0x16d: 0x4, // abbr
- 0x16e: 0x1b06, // dialog
- 0x170: 0x1202, // li
- 0x172: 0x47a02, // mo
- 0x175: 0x1fd03, // for
- 0x176: 0x1cd03, // ins
- 0x178: 0x53004, // size
- 0x17a: 0x5207, // default
- 0x17b: 0x1a03, // bdi
- 0x17c: 0x4ce0a, // onpagehide
- 0x17d: 0x9d07, // dirname
- 0x17e: 0x23304, // type
- 0x17f: 0x21704, // form
- 0x180: 0x4c105, // inert
- 0x181: 0x12709, // oncanplay
- 0x182: 0x8303, // dfn
- 0x183: 0x45c08, // tabindex
- 0x186: 0x7f02, // em
- 0x187: 0x29c04, // lang
- 0x189: 0x3a208, // dropzone
- 0x18a: 0x4110a, // onkeypress
- 0x18b: 0x25b08, // datetime
- 0x18c: 0x18804, // cols
- 0x18d: 0x1, // a
- 0x18e: 0x43b0c, // onloadeddata
- 0x191: 0x15606, // border
- 0x192: 0x2e05, // tbody
- 0x193: 0x24b06, // method
- 0x195: 0xbe04, // loop
- 0x196: 0x2b406, // iframe
- 0x198: 0x2fa04, // head
- 0x19e: 0x5b608, // manifest
- 0x19f: 0xe109, // autofocus
- 0x1a0: 0x16f04, // code
- 0x1a1: 0x53406, // strong
- 0x1a2: 0x32108, // multiple
- 0x1a3: 0xc05, // param
- 0x1a6: 0x23007, // enctype
- 0x1a7: 0x2dd04, // face
- 0x1a8: 0xf109, // plaintext
- 0x1a9: 0x13602, // h1
- 0x1aa: 0x56609, // onstalled
- 0x1ad: 0x28d06, // script
- 0x1ae: 0x30006, // spacer
- 0x1af: 0x52c08, // onresize
- 0x1b0: 0x49b0b, // onmouseover
- 0x1b1: 0x59108, // onunload
- 0x1b2: 0x54208, // onseeked
- 0x1b4: 0x2330d, // typemustmatch
- 0x1b5: 0x1f106, // figure
- 0x1b6: 0x48e0a, // onmouseout
- 0x1b7: 0x27f03, // pre
- 0x1b8: 0x4e205, // width
- 0x1bb: 0x7404, // nobr
- 0x1be: 0x7002, // tt
- 0x1bf: 0x1105, // align
- 0x1c0: 0x3f407, // oninput
- 0x1c3: 0x42107, // onkeyup
- 0x1c6: 0x1e50c, // onafterprint
- 0x1c7: 0x210e, // accept-charset
- 0x1c8: 0x9806, // itemid
- 0x1cb: 0x50e06, // strike
- 0x1cc: 0x57a03, // sub
- 0x1cd: 0xf905, // track
- 0x1ce: 0x39705, // start
- 0x1d0: 0x11408, // basefont
- 0x1d6: 0x1cf06, // source
- 0x1d7: 0x1a806, // legend
- 0x1d8: 0x2f905, // thead
- 0x1da: 0x2e905, // scope
- 0x1dd: 0x21106, // object
- 0x1de: 0xa205, // media
- 0x1df: 0x18d0a, // annotation
- 0x1e0: 0x22c0b, // formenctype
- 0x1e2: 0x28b08, // noscript
- 0x1e4: 0x53005, // sizes
- 0x1e5: 0xd50c, // autocomplete
- 0x1e6: 0x7a04, // span
- 0x1e7: 0x8508, // noframes
- 0x1e8: 0x26a06, // target
- 0x1e9: 0x3a006, // ondrop
- 0x1ea: 0x3d406, // srcdoc
- 0x1ec: 0x5e08, // reversed
- 0x1f0: 0x2c707, // isindex
- 0x1f3: 0x29808, // hreflang
- 0x1f5: 0x4e602, // h5
- 0x1f6: 0x5d507, // address
- 0x1fa: 0x30603, // max
- 0x1fb: 0xc70b, // placeholder
- 0x1fc: 0x31408, // textarea
- 0x1fe: 0x4a609, // onmouseup
- 0x1ff: 0x3910b, // ondragstart
-}
-
-const atomText = "abbradiogrouparamalignmarkbdialogaccept-charsetbodyaccesskey" +
- "genavaluealtdescanvasidefaultfootereversedetailsampatternobr" +
- "owspanoembedfnoframesetitleasyncitemidirnamediagroupingaudio" +
- "ncancelabelooptgrouplaceholderubyautocompleteautofocusandbox" +
- "mplaintextrackindisabledivarautoplaybasefontimeupdatebdoncan" +
- "playthrough1bgsoundlowbrbigblinkblockquoteborderbuttonabortr" +
- "anslatecodefercolgroupostercolorcolspannotation-xmlcommandra" +
- "ggablegendcontrolshapecoordsmallcrossoriginsourcefieldsetfig" +
- "captionafterprintfigurequiredforeignObjectforeignobjectforma" +
- "ctionbeforeprintformenctypemustmatchallengeformmethodformnov" +
- "alidatetimeterformtargeth6heightmlhgroupreloadhiddenoscripth" +
- "igh2hreflanghttp-equivideonclickiframeimageimglyph3isindexis" +
- "mappletitemrefacenteritemscopeditemtypematheaderspacermaxlen" +
- "gth4minmtextareadonlymultiplemutedoncloseamlesspellcheckedon" +
- "contextmenuoncuechangeondblclickondragendondragenterondragle" +
- "aveondragoverondragstarticleondropzonemptiedondurationchange" +
- "onendedonerroronfocusrcdoclassectionbluronhashchangeoninputo" +
- "ninvalidonkeydownloadonkeypressrclangonkeyupublicontentedita" +
- "bleonloadeddatalistingonloadedmetadatabindexonloadstartonmes" +
- "sageonmousedownonmousemoveonmouseoutputonmouseoveronmouseupo" +
- "nmousewheelonofflinertononlineonpagehidelonpageshowidth5onpa" +
- "usemaponplayingonpopstateonprogresstrikeytypeonratechangeonr" +
- "esetonresizestrongonscrollonseekedonseekingonselectedonshowr" +
- "aponstalledonstorageonsubmitempropenonsuspendonunloadonvolum" +
- "echangeonwaitingoptimumanifestepromptoptionbeforeunloaddress" +
- "tylesummarysupsvgsystemarquee"
diff --git a/Godeps/_workspace/src/code.google.com/p/go.net/html/atom/table_test.go b/Godeps/_workspace/src/code.google.com/p/go.net/html/atom/table_test.go
deleted file mode 100644
index db016a1..0000000
--- a/Godeps/_workspace/src/code.google.com/p/go.net/html/atom/table_test.go
+++ /dev/null
@@ -1,341 +0,0 @@
-// generated by go run gen.go -test; DO NOT EDIT
-
-package atom
-
-var testAtomList = []string{
- "a",
- "abbr",
- "accept",
- "accept-charset",
- "accesskey",
- "action",
- "address",
- "align",
- "alt",
- "annotation",
- "annotation-xml",
- "applet",
- "area",
- "article",
- "aside",
- "async",
- "audio",
- "autocomplete",
- "autofocus",
- "autoplay",
- "b",
- "base",
- "basefont",
- "bdi",
- "bdo",
- "bgsound",
- "big",
- "blink",
- "blockquote",
- "body",
- "border",
- "br",
- "button",
- "canvas",
- "caption",
- "center",
- "challenge",
- "charset",
- "checked",
- "cite",
- "cite",
- "class",
- "code",
- "col",
- "colgroup",
- "color",
- "cols",
- "colspan",
- "command",
- "command",
- "content",
- "contenteditable",
- "contextmenu",
- "controls",
- "coords",
- "crossorigin",
- "data",
- "data",
- "datalist",
- "datetime",
- "dd",
- "default",
- "defer",
- "del",
- "desc",
- "details",
- "dfn",
- "dialog",
- "dir",
- "dirname",
- "disabled",
- "div",
- "dl",
- "download",
- "draggable",
- "dropzone",
- "dt",
- "em",
- "embed",
- "enctype",
- "face",
- "fieldset",
- "figcaption",
- "figure",
- "font",
- "footer",
- "for",
- "foreignObject",
- "foreignobject",
- "form",
- "form",
- "formaction",
- "formenctype",
- "formmethod",
- "formnovalidate",
- "formtarget",
- "frame",
- "frameset",
- "h1",
- "h2",
- "h3",
- "h4",
- "h5",
- "h6",
- "head",
- "header",
- "headers",
- "height",
- "hgroup",
- "hidden",
- "high",
- "hr",
- "href",
- "hreflang",
- "html",
- "http-equiv",
- "i",
- "icon",
- "id",
- "iframe",
- "image",
- "img",
- "inert",
- "input",
- "ins",
- "isindex",
- "ismap",
- "itemid",
- "itemprop",
- "itemref",
- "itemscope",
- "itemtype",
- "kbd",
- "keygen",
- "keytype",
- "kind",
- "label",
- "label",
- "lang",
- "legend",
- "li",
- "link",
- "list",
- "listing",
- "loop",
- "low",
- "malignmark",
- "manifest",
- "map",
- "mark",
- "marquee",
- "math",
- "max",
- "maxlength",
- "media",
- "mediagroup",
- "menu",
- "meta",
- "meter",
- "method",
- "mglyph",
- "mi",
- "min",
- "mn",
- "mo",
- "ms",
- "mtext",
- "multiple",
- "muted",
- "name",
- "nav",
- "nobr",
- "noembed",
- "noframes",
- "noscript",
- "novalidate",
- "object",
- "ol",
- "onabort",
- "onafterprint",
- "onbeforeprint",
- "onbeforeunload",
- "onblur",
- "oncancel",
- "oncanplay",
- "oncanplaythrough",
- "onchange",
- "onclick",
- "onclose",
- "oncontextmenu",
- "oncuechange",
- "ondblclick",
- "ondrag",
- "ondragend",
- "ondragenter",
- "ondragleave",
- "ondragover",
- "ondragstart",
- "ondrop",
- "ondurationchange",
- "onemptied",
- "onended",
- "onerror",
- "onfocus",
- "onhashchange",
- "oninput",
- "oninvalid",
- "onkeydown",
- "onkeypress",
- "onkeyup",
- "onload",
- "onloadeddata",
- "onloadedmetadata",
- "onloadstart",
- "onmessage",
- "onmousedown",
- "onmousemove",
- "onmouseout",
- "onmouseover",
- "onmouseup",
- "onmousewheel",
- "onoffline",
- "ononline",
- "onpagehide",
- "onpageshow",
- "onpause",
- "onplay",
- "onplaying",
- "onpopstate",
- "onprogress",
- "onratechange",
- "onreset",
- "onresize",
- "onscroll",
- "onseeked",
- "onseeking",
- "onselect",
- "onshow",
- "onstalled",
- "onstorage",
- "onsubmit",
- "onsuspend",
- "ontimeupdate",
- "onunload",
- "onvolumechange",
- "onwaiting",
- "open",
- "optgroup",
- "optimum",
- "option",
- "output",
- "p",
- "param",
- "pattern",
- "ping",
- "placeholder",
- "plaintext",
- "poster",
- "pre",
- "preload",
- "progress",
- "prompt",
- "public",
- "q",
- "radiogroup",
- "readonly",
- "rel",
- "required",
- "reversed",
- "rows",
- "rowspan",
- "rp",
- "rt",
- "ruby",
- "s",
- "samp",
- "sandbox",
- "scope",
- "scoped",
- "script",
- "seamless",
- "section",
- "select",
- "selected",
- "shape",
- "size",
- "sizes",
- "small",
- "source",
- "spacer",
- "span",
- "span",
- "spellcheck",
- "src",
- "srcdoc",
- "srclang",
- "start",
- "step",
- "strike",
- "strong",
- "style",
- "style",
- "sub",
- "summary",
- "sup",
- "svg",
- "system",
- "tabindex",
- "table",
- "target",
- "tbody",
- "td",
- "textarea",
- "tfoot",
- "th",
- "thead",
- "time",
- "title",
- "title",
- "tr",
- "track",
- "translate",
- "tt",
- "type",
- "typemustmatch",
- "u",
- "ul",
- "usemap",
- "value",
- "var",
- "video",
- "wbr",
- "width",
- "wrap",
- "xmp",
-}
diff --git a/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/charset.go b/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/charset.go
deleted file mode 100644
index 39dc268..0000000
--- a/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/charset.go
+++ /dev/null
@@ -1,227 +0,0 @@
-// Package charset provides common text encodings for HTML documents.
-//
-// The mapping from encoding labels to encodings is defined at
-// http://encoding.spec.whatwg.org.
-package charset
-
-import (
- "bytes"
- "io"
- "mime"
- "strings"
- "unicode/utf8"
-
- "code.google.com/p/go.net/html"
- "code.google.com/p/go.text/encoding"
- "code.google.com/p/go.text/encoding/charmap"
- "code.google.com/p/go.text/transform"
-)
-
-// Lookup returns the encoding with the specified label, and its canonical
-// name. It returns nil and the empty string if label is not one of the
-// standard encodings for HTML. Matching is case-insensitive and ignores
-// leading and trailing whitespace.
-func Lookup(label string) (e encoding.Encoding, name string) {
- label = strings.ToLower(strings.Trim(label, "\t\n\r\f "))
- enc := encodings[label]
- return enc.e, enc.name
-}
-
-// DetermineEncoding determines the encoding of an HTML document by examining
-// up to the first 1024 bytes of content and the declared Content-Type.
-//
-// See http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding
-func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding, name string, certain bool) {
- if len(content) > 1024 {
- content = content[:1024]
- }
-
- for _, b := range boms {
- if bytes.HasPrefix(content, b.bom) {
- e, name = Lookup(b.enc)
- return e, name, true
- }
- }
-
- if _, params, err := mime.ParseMediaType(contentType); err == nil {
- if cs, ok := params["charset"]; ok {
- if e, name = Lookup(cs); e != nil {
- return e, name, true
- }
- }
- }
-
- if len(content) > 0 {
- e, name = prescan(content)
- if e != nil {
- return e, name, false
- }
- }
-
- // Try to detect UTF-8.
- // First eliminate any partial rune at the end.
- for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- {
- b := content[i]
- if b < 0x80 {
- break
- }
- if utf8.RuneStart(b) {
- content = content[:i]
- break
- }
- }
- hasHighBit := false
- for _, c := range content {
- if c >= 0x80 {
- hasHighBit = true
- break
- }
- }
- if hasHighBit && utf8.Valid(content) {
- return encoding.Nop, "utf-8", false
- }
-
- // TODO: change default depending on user's locale?
- return charmap.Windows1252, "windows-1252", false
-}
-
-// NewReader returns an io.Reader that converts the content of r to UTF-8.
-// It calls DetermineEncoding to find out what r's encoding is.
-func NewReader(r io.Reader, contentType string) (io.Reader, error) {
- preview := make([]byte, 1024)
- n, err := io.ReadFull(r, preview)
- switch {
- case err == io.ErrUnexpectedEOF:
- preview = preview[:n]
- r = bytes.NewReader(preview)
- case err != nil:
- return nil, err
- default:
- r = io.MultiReader(bytes.NewReader(preview), r)
- }
-
- if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop {
- r = transform.NewReader(r, e.NewDecoder())
- }
- return r, nil
-}
-
-func prescan(content []byte) (e encoding.Encoding, name string) {
- z := html.NewTokenizer(bytes.NewReader(content))
- for {
- switch z.Next() {
- case html.ErrorToken:
- return nil, ""
-
- case html.StartTagToken, html.SelfClosingTagToken:
- tagName, hasAttr := z.TagName()
- if !bytes.Equal(tagName, []byte("meta")) {
- continue
- }
- attrList := make(map[string]bool)
- gotPragma := false
-
- const (
- dontKnow = iota
- doNeedPragma
- doNotNeedPragma
- )
- needPragma := dontKnow
-
- name = ""
- e = nil
- for hasAttr {
- var key, val []byte
- key, val, hasAttr = z.TagAttr()
- ks := string(key)
- if attrList[ks] {
- continue
- }
- attrList[ks] = true
- for i, c := range val {
- if 'A' <= c && c <= 'Z' {
- val[i] = c + 0x20
- }
- }
-
- switch ks {
- case "http-equiv":
- if bytes.Equal(val, []byte("content-type")) {
- gotPragma = true
- }
-
- case "content":
- if e == nil {
- name = fromMetaElement(string(val))
- if name != "" {
- e, name = Lookup(name)
- if e != nil {
- needPragma = doNeedPragma
- }
- }
- }
-
- case "charset":
- e, name = Lookup(string(val))
- needPragma = doNotNeedPragma
- }
- }
-
- if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
- continue
- }
-
- if strings.HasPrefix(name, "utf-16") {
- name = "utf-8"
- e = encoding.Nop
- }
-
- if e != nil {
- return e, name
- }
- }
- }
-}
-
-func fromMetaElement(s string) string {
- for s != "" {
- csLoc := strings.Index(s, "charset")
- if csLoc == -1 {
- return ""
- }
- s = s[csLoc+len("charset"):]
- s = strings.TrimLeft(s, " \t\n\f\r")
- if !strings.HasPrefix(s, "=") {
- continue
- }
- s = s[1:]
- s = strings.TrimLeft(s, " \t\n\f\r")
- if s == "" {
- return ""
- }
- if q := s[0]; q == '"' || q == '\'' {
- s = s[1:]
- closeQuote := strings.IndexRune(s, rune(q))
- if closeQuote == -1 {
- return ""
- }
- return s[:closeQuote]
- }
-
- end := strings.IndexAny(s, "; \t\n\f\r")
- if end == -1 {
- end = len(s)
- }
- return s[:end]
- }
- return ""
-}
-
-var boms = []struct {
- bom []byte
- enc string
-}{
- {[]byte{0xfe, 0xff}, "utf-16be"},
- {[]byte{0xff, 0xfe}, "utf-16le"},
- {[]byte{0xef, 0xbb, 0xbf}, "utf-8"},
-}
diff --git a/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/charset_test.go b/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/charset_test.go
deleted file mode 100644
index a656dd9..0000000
--- a/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/charset_test.go
+++ /dev/null
@@ -1,200 +0,0 @@
-package charset
-
-import (
- "bytes"
- "io/ioutil"
- "strings"
- "testing"
-
- "code.google.com/p/go.text/transform"
-)
-
-func transformString(t transform.Transformer, s string) (string, error) {
- r := transform.NewReader(strings.NewReader(s), t)
- b, err := ioutil.ReadAll(r)
- return string(b), err
-}
-
-var testCases = []struct {
- utf8, other, otherEncoding string
-}{
- {"Résumé", "Résumé", "utf8"},
- {"Résumé", "R\xe9sum\xe9", "latin1"},
- {"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"},
- {"これは漢字です。", "0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16BE"},
- {"Hello, world", "Hello, world", "ASCII"},
- {"Gdańsk", "Gda\xf1sk", "ISO-8859-2"},
- {"Ââ Čč Đđ Ŋŋ Õõ Šš Žž Åå Ää", "\xc2\xe2 \xc8\xe8 \xa9\xb9 \xaf\xbf \xd5\xf5 \xaa\xba \xac\xbc \xc5\xe5 \xc4\xe4", "ISO-8859-10"},
- {"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "ISO-8859-11"},
- {"latviešu", "latvie\xf0u", "ISO-8859-13"},
- {"Seònaid", "Se\xf2naid", "ISO-8859-14"},
- {"€1 is cheap", "\xa41 is cheap", "ISO-8859-15"},
- {"românește", "rom\xe2ne\xbate", "ISO-8859-16"},
- {"nutraĵo", "nutra\xbco", "ISO-8859-3"},
- {"Kalâdlit", "Kal\xe2dlit", "ISO-8859-4"},
- {"русский", "\xe0\xe3\xe1\xe1\xda\xd8\xd9", "ISO-8859-5"},
- {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "ISO-8859-7"},
- {"Kağan", "Ka\xf0an", "ISO-8859-9"},
- {"Résumé", "R\x8esum\x8e", "macintosh"},
- {"Gdańsk", "Gda\xf1sk", "windows-1250"},
- {"русский", "\xf0\xf3\xf1\xf1\xea\xe8\xe9", "windows-1251"},
- {"Résumé", "R\xe9sum\xe9", "windows-1252"},
- {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "windows-1253"},
- {"Kağan", "Ka\xf0an", "windows-1254"},
- {"עִבְרִית", "\xf2\xc4\xe1\xc0\xf8\xc4\xe9\xfa", "windows-1255"},
- {"العربية", "\xc7\xe1\xda\xd1\xc8\xed\xc9", "windows-1256"},
- {"latviešu", "latvie\xf0u", "windows-1257"},
- {"Việt", "Vi\xea\xf2t", "windows-1258"},
- {"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "windows-874"},
- {"русский", "\xd2\xd5\xd3\xd3\xcb\xc9\xca", "KOI8-R"},
- {"українська", "\xd5\xcb\xd2\xc1\xa7\xce\xd3\xd8\xcb\xc1", "KOI8-U"},
- {"Hello 常用國字標準字體表", "Hello \xb1`\xa5\u03b0\xea\xa6r\xbc\u0437\u01e6r\xc5\xe9\xaa\xed", "big5"},
- {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gbk"},
- {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gb18030"},
- {"עִבְרִית", "\x81\x30\xfb\x30\x81\x30\xf6\x34\x81\x30\xf9\x33\x81\x30\xf6\x30\x81\x30\xfb\x36\x81\x30\xf6\x34\x81\x30\xfa\x31\x81\x30\xfb\x38", "gb18030"},
- {"㧯", "\x82\x31\x89\x38", "gb18030"},
- {"これは漢字です。", "\x82\xb1\x82\xea\x82\xcd\x8a\xbf\x8e\x9a\x82\xc5\x82\xb7\x81B", "SJIS"},
- {"Hello, 世界!", "Hello, \x90\xa2\x8aE!", "SJIS"},
- {"イウエオカ", "\xb2\xb3\xb4\xb5\xb6", "SJIS"},
- {"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "EUC-JP"},
- {"Hello, 世界!", "Hello, \x1b$B@$3&\x1b(B!", "ISO-2022-JP"},
- {"네이트 | 즐거움의 시작, 슈파스(Spaβ) NATE", "\xb3\xd7\xc0\xcc\xc6\xae | \xc1\xf1\xb0\xc5\xbf\xf2\xc0\xc7 \xbd\xc3\xc0\xdb, \xbd\xb4\xc6\xc4\xbd\xba(Spa\xa5\xe2) NATE", "EUC-KR"},
-}
-
-func TestDecode(t *testing.T) {
- for _, tc := range testCases {
- e, _ := Lookup(tc.otherEncoding)
- if e == nil {
- t.Errorf("%s: not found", tc.otherEncoding)
- continue
- }
- s, err := transformString(e.NewDecoder(), tc.other)
- if err != nil {
- t.Errorf("%s: decode %q: %v", tc.otherEncoding, tc.other, err)
- continue
- }
- if s != tc.utf8 {
- t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.utf8)
- }
- }
-}
-
-func TestEncode(t *testing.T) {
- for _, tc := range testCases {
- e, _ := Lookup(tc.otherEncoding)
- if e == nil {
- t.Errorf("%s: not found", tc.otherEncoding)
- continue
- }
- s, err := transformString(e.NewEncoder(), tc.utf8)
- if err != nil {
- t.Errorf("%s: encode %q: %s", tc.otherEncoding, tc.utf8, err)
- continue
- }
- if s != tc.other {
- t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.other)
- }
- }
-}
-
-// TestNames verifies that you can pass an encoding's name to Lookup and get
-// the same encoding back (except for "replacement").
-func TestNames(t *testing.T) {
- for _, e := range encodings {
- if e.name == "replacement" {
- continue
- }
- _, got := Lookup(e.name)
- if got != e.name {
- t.Errorf("got %q, want %q", got, e.name)
- continue
- }
- }
-}
-
-var sniffTestCases = []struct {
- filename, declared, want string
-}{
- {"HTTP-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
- {"UTF-16LE-BOM.html", "", "utf-16le"},
- {"UTF-16BE-BOM.html", "", "utf-16be"},
- {"meta-content-attribute.html", "text/html", "iso-8859-15"},
- {"meta-charset-attribute.html", "text/html", "iso-8859-15"},
- {"No-encoding-declaration.html", "text/html", "utf-8"},
- {"HTTP-vs-UTF-8-BOM.html", "text/html; charset=iso-8859-15", "utf-8"},
- {"HTTP-vs-meta-content.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
- {"HTTP-vs-meta-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
- {"UTF-8-BOM-vs-meta-content.html", "text/html", "utf-8"},
- {"UTF-8-BOM-vs-meta-charset.html", "text/html", "utf-8"},
-}
-
-func TestSniff(t *testing.T) {
- for _, tc := range sniffTestCases {
- content, err := ioutil.ReadFile("testdata/" + tc.filename)
- if err != nil {
- t.Errorf("%s: error reading file: %v", tc.filename, err)
- continue
- }
-
- _, name, _ := DetermineEncoding(content, tc.declared)
- if name != tc.want {
- t.Errorf("%s: got %q, want %q", tc.filename, name, tc.want)
- continue
- }
- }
-}
-
-func TestReader(t *testing.T) {
- for _, tc := range sniffTestCases {
- content, err := ioutil.ReadFile("testdata/" + tc.filename)
- if err != nil {
- t.Errorf("%s: error reading file: %v", tc.filename, err)
- continue
- }
-
- r, err := NewReader(bytes.NewReader(content), tc.declared)
- if err != nil {
- t.Errorf("%s: error creating reader: %v", tc.filename, err)
- continue
- }
-
- got, err := ioutil.ReadAll(r)
- if err != nil {
- t.Errorf("%s: error reading from charset.NewReader: %v", tc.filename, err)
- continue
- }
-
- e, _ := Lookup(tc.want)
- want, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader(content), e.NewDecoder()))
- if err != nil {
- t.Errorf("%s: error decoding with hard-coded charset name: %v", tc.filename, err)
- continue
- }
-
- if !bytes.Equal(got, want) {
- t.Errorf("%s: got %q, want %q", tc.filename, got, want)
- continue
- }
- }
-}
-
-var metaTestCases = []struct {
- meta, want string
-}{
- {"", ""},
- {"text/html", ""},
- {"text/html; charset utf-8", ""},
- {"text/html; charset=latin-2", "latin-2"},
- {"text/html; charset; charset = utf-8", "utf-8"},
- {`charset="big5"`, "big5"},
- {"charset='shift_jis'", "shift_jis"},
-}
-
-func TestFromMeta(t *testing.T) {
- for _, tc := range metaTestCases {
- got := fromMetaElement(tc.meta)
- if got != tc.want {
- t.Errorf("%q: got %q, want %q", tc.meta, got, tc.want)
- }
- }
-}
diff --git a/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/gen.go b/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/gen.go
deleted file mode 100644
index 25a9eb6..0000000
--- a/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/gen.go
+++ /dev/null
@@ -1,107 +0,0 @@
-// +build ignore
-
-package main
-
-// Download http://encoding.spec.whatwg.org/encodings.json and use it to
-// generate table.go.
-
-import (
- "encoding/json"
- "fmt"
- "log"
- "net/http"
- "strings"
-)
-
-type enc struct {
- Name string
- Labels []string
-}
-
-type group struct {
- Encodings []enc
- Heading string
-}
-
-const specURL = "http://encoding.spec.whatwg.org/encodings.json"
-
-func main() {
- resp, err := http.Get(specURL)
- if err != nil {
- log.Fatalf("error fetching %s: %s", specURL, err)
- }
- if resp.StatusCode != 200 {
- log.Fatalf("error fetching %s: HTTP status %s", specURL, resp.Status)
- }
- defer resp.Body.Close()
-
- var groups []group
- d := json.NewDecoder(resp.Body)
- err = d.Decode(&groups)
- if err != nil {
- log.Fatalf("error reading encodings.json: %s", err)
- }
-
- fmt.Println("// generated by go run gen.go; DO NOT EDIT")
- fmt.Println()
- fmt.Println("package charset")
- fmt.Println()
-
- fmt.Println("import (")
- fmt.Println(`"code.google.com/p/go.text/encoding"`)
- for _, pkg := range []string{"charmap", "japanese", "korean", "simplifiedchinese", "traditionalchinese", "unicode"} {
- fmt.Printf("\"code.google.com/p/go.text/encoding/%s\"\n", pkg)
- }
- fmt.Println(")")
- fmt.Println()
-
- fmt.Println("var encodings = map[string]struct{e encoding.Encoding; name string} {")
- for _, g := range groups {
- for _, e := range g.Encodings {
- goName, ok := miscNames[e.Name]
- if !ok {
- for k, v := range prefixes {
- if strings.HasPrefix(e.Name, k) {
- goName = v + e.Name[len(k):]
- break
- }
- }
- if goName == "" {
- log.Fatalf("unrecognized encoding name: %s", e.Name)
- }
- }
-
- for _, label := range e.Labels {
- fmt.Printf("%q: {%s, %q},\n", label, goName, e.Name)
- }
- }
- }
- fmt.Println("}")
-}
-
-var prefixes = map[string]string{
- "iso-8859-": "charmap.ISO8859_",
- "windows-": "charmap.Windows",
-}
-
-var miscNames = map[string]string{
- "utf-8": "encoding.Nop",
- "ibm866": "charmap.CodePage866",
- "iso-8859-8-i": "charmap.ISO8859_8",
- "koi8-r": "charmap.KOI8R",
- "koi8-u": "charmap.KOI8U",
- "macintosh": "charmap.Macintosh",
- "x-mac-cyrillic": "charmap.MacintoshCyrillic",
- "gbk": "simplifiedchinese.GBK",
- "gb18030": "simplifiedchinese.GB18030",
- "hz-gb-2312": "simplifiedchinese.HZGB2312",
- "big5": "traditionalchinese.Big5",
- "euc-jp": "japanese.EUCJP",
- "iso-2022-jp": "japanese.ISO2022JP",
- "shift_jis": "japanese.ShiftJIS",
- "euc-kr": "korean.EUCKR",
- "replacement": "encoding.Replacement",
- "utf-16be": "unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)",
- "utf-16le": "unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)",
- "x-user-defined": "charmap.XUserDefined",
-}
diff --git a/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/table.go b/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/table.go
deleted file mode 100644
index 66f8af1..0000000
--- a/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/table.go
+++ /dev/null
@@ -1,235 +0,0 @@
-// generated by go run gen.go; DO NOT EDIT
-
-package charset
-
-import (
- "code.google.com/p/go.text/encoding"
- "code.google.com/p/go.text/encoding/charmap"
- "code.google.com/p/go.text/encoding/japanese"
- "code.google.com/p/go.text/encoding/korean"
- "code.google.com/p/go.text/encoding/simplifiedchinese"
- "code.google.com/p/go.text/encoding/traditionalchinese"
- "code.google.com/p/go.text/encoding/unicode"
-)
-
-var encodings = map[string]struct {
- e encoding.Encoding
- name string
-}{
- "unicode-1-1-utf-8": {encoding.Nop, "utf-8"},
- "utf-8": {encoding.Nop, "utf-8"},
- "utf8": {encoding.Nop, "utf-8"},
- "866": {charmap.CodePage866, "ibm866"},
- "cp866": {charmap.CodePage866, "ibm866"},
- "csibm866": {charmap.CodePage866, "ibm866"},
- "ibm866": {charmap.CodePage866, "ibm866"},
- "csisolatin2": {charmap.ISO8859_2, "iso-8859-2"},
- "iso-8859-2": {charmap.ISO8859_2, "iso-8859-2"},
- "iso-ir-101": {charmap.ISO8859_2, "iso-8859-2"},
- "iso8859-2": {charmap.ISO8859_2, "iso-8859-2"},
- "iso88592": {charmap.ISO8859_2, "iso-8859-2"},
- "iso_8859-2": {charmap.ISO8859_2, "iso-8859-2"},
- "iso_8859-2:1987": {charmap.ISO8859_2, "iso-8859-2"},
- "l2": {charmap.ISO8859_2, "iso-8859-2"},
- "latin2": {charmap.ISO8859_2, "iso-8859-2"},
- "csisolatin3": {charmap.ISO8859_3, "iso-8859-3"},
- "iso-8859-3": {charmap.ISO8859_3, "iso-8859-3"},
- "iso-ir-109": {charmap.ISO8859_3, "iso-8859-3"},
- "iso8859-3": {charmap.ISO8859_3, "iso-8859-3"},
- "iso88593": {charmap.ISO8859_3, "iso-8859-3"},
- "iso_8859-3": {charmap.ISO8859_3, "iso-8859-3"},
- "iso_8859-3:1988": {charmap.ISO8859_3, "iso-8859-3"},
- "l3": {charmap.ISO8859_3, "iso-8859-3"},
- "latin3": {charmap.ISO8859_3, "iso-8859-3"},
- "csisolatin4": {charmap.ISO8859_4, "iso-8859-4"},
- "iso-8859-4": {charmap.ISO8859_4, "iso-8859-4"},
- "iso-ir-110": {charmap.ISO8859_4, "iso-8859-4"},
- "iso8859-4": {charmap.ISO8859_4, "iso-8859-4"},
- "iso88594": {charmap.ISO8859_4, "iso-8859-4"},
- "iso_8859-4": {charmap.ISO8859_4, "iso-8859-4"},
- "iso_8859-4:1988": {charmap.ISO8859_4, "iso-8859-4"},
- "l4": {charmap.ISO8859_4, "iso-8859-4"},
- "latin4": {charmap.ISO8859_4, "iso-8859-4"},
- "csisolatincyrillic": {charmap.ISO8859_5, "iso-8859-5"},
- "cyrillic": {charmap.ISO8859_5, "iso-8859-5"},
- "iso-8859-5": {charmap.ISO8859_5, "iso-8859-5"},
- "iso-ir-144": {charmap.ISO8859_5, "iso-8859-5"},
- "iso8859-5": {charmap.ISO8859_5, "iso-8859-5"},
- "iso88595": {charmap.ISO8859_5, "iso-8859-5"},
- "iso_8859-5": {charmap.ISO8859_5, "iso-8859-5"},
- "iso_8859-5:1988": {charmap.ISO8859_5, "iso-8859-5"},
- "arabic": {charmap.ISO8859_6, "iso-8859-6"},
- "asmo-708": {charmap.ISO8859_6, "iso-8859-6"},
- "csiso88596e": {charmap.ISO8859_6, "iso-8859-6"},
- "csiso88596i": {charmap.ISO8859_6, "iso-8859-6"},
- "csisolatinarabic": {charmap.ISO8859_6, "iso-8859-6"},
- "ecma-114": {charmap.ISO8859_6, "iso-8859-6"},
- "iso-8859-6": {charmap.ISO8859_6, "iso-8859-6"},
- "iso-8859-6-e": {charmap.ISO8859_6, "iso-8859-6"},
- "iso-8859-6-i": {charmap.ISO8859_6, "iso-8859-6"},
- "iso-ir-127": {charmap.ISO8859_6, "iso-8859-6"},
- "iso8859-6": {charmap.ISO8859_6, "iso-8859-6"},
- "iso88596": {charmap.ISO8859_6, "iso-8859-6"},
- "iso_8859-6": {charmap.ISO8859_6, "iso-8859-6"},
- "iso_8859-6:1987": {charmap.ISO8859_6, "iso-8859-6"},
- "csisolatingreek": {charmap.ISO8859_7, "iso-8859-7"},
- "ecma-118": {charmap.ISO8859_7, "iso-8859-7"},
- "elot_928": {charmap.ISO8859_7, "iso-8859-7"},
- "greek": {charmap.ISO8859_7, "iso-8859-7"},
- "greek8": {charmap.ISO8859_7, "iso-8859-7"},
- "iso-8859-7": {charmap.ISO8859_7, "iso-8859-7"},
- "iso-ir-126": {charmap.ISO8859_7, "iso-8859-7"},
- "iso8859-7": {charmap.ISO8859_7, "iso-8859-7"},
- "iso88597": {charmap.ISO8859_7, "iso-8859-7"},
- "iso_8859-7": {charmap.ISO8859_7, "iso-8859-7"},
- "iso_8859-7:1987": {charmap.ISO8859_7, "iso-8859-7"},
- "sun_eu_greek": {charmap.ISO8859_7, "iso-8859-7"},
- "csiso88598e": {charmap.ISO8859_8, "iso-8859-8"},
- "csisolatinhebrew": {charmap.ISO8859_8, "iso-8859-8"},
- "hebrew": {charmap.ISO8859_8, "iso-8859-8"},
- "iso-8859-8": {charmap.ISO8859_8, "iso-8859-8"},
- "iso-8859-8-e": {charmap.ISO8859_8, "iso-8859-8"},
- "iso-ir-138": {charmap.ISO8859_8, "iso-8859-8"},
- "iso8859-8": {charmap.ISO8859_8, "iso-8859-8"},
- "iso88598": {charmap.ISO8859_8, "iso-8859-8"},
- "iso_8859-8": {charmap.ISO8859_8, "iso-8859-8"},
- "iso_8859-8:1988": {charmap.ISO8859_8, "iso-8859-8"},
- "visual": {charmap.ISO8859_8, "iso-8859-8"},
- "csiso88598i": {charmap.ISO8859_8, "iso-8859-8-i"},
- "iso-8859-8-i": {charmap.ISO8859_8, "iso-8859-8-i"},
- "logical": {charmap.ISO8859_8, "iso-8859-8-i"},
- "csisolatin6": {charmap.ISO8859_10, "iso-8859-10"},
- "iso-8859-10": {charmap.ISO8859_10, "iso-8859-10"},
- "iso-ir-157": {charmap.ISO8859_10, "iso-8859-10"},
- "iso8859-10": {charmap.ISO8859_10, "iso-8859-10"},
- "iso885910": {charmap.ISO8859_10, "iso-8859-10"},
- "l6": {charmap.ISO8859_10, "iso-8859-10"},
- "latin6": {charmap.ISO8859_10, "iso-8859-10"},
- "iso-8859-13": {charmap.ISO8859_13, "iso-8859-13"},
- "iso8859-13": {charmap.ISO8859_13, "iso-8859-13"},
- "iso885913": {charmap.ISO8859_13, "iso-8859-13"},
- "iso-8859-14": {charmap.ISO8859_14, "iso-8859-14"},
- "iso8859-14": {charmap.ISO8859_14, "iso-8859-14"},
- "iso885914": {charmap.ISO8859_14, "iso-8859-14"},
- "csisolatin9": {charmap.ISO8859_15, "iso-8859-15"},
- "iso-8859-15": {charmap.ISO8859_15, "iso-8859-15"},
- "iso8859-15": {charmap.ISO8859_15, "iso-8859-15"},
- "iso885915": {charmap.ISO8859_15, "iso-8859-15"},
- "iso_8859-15": {charmap.ISO8859_15, "iso-8859-15"},
- "l9": {charmap.ISO8859_15, "iso-8859-15"},
- "iso-8859-16": {charmap.ISO8859_16, "iso-8859-16"},
- "cskoi8r": {charmap.KOI8R, "koi8-r"},
- "koi": {charmap.KOI8R, "koi8-r"},
- "koi8": {charmap.KOI8R, "koi8-r"},
- "koi8-r": {charmap.KOI8R, "koi8-r"},
- "koi8_r": {charmap.KOI8R, "koi8-r"},
- "koi8-u": {charmap.KOI8U, "koi8-u"},
- "csmacintosh": {charmap.Macintosh, "macintosh"},
- "mac": {charmap.Macintosh, "macintosh"},
- "macintosh": {charmap.Macintosh, "macintosh"},
- "x-mac-roman": {charmap.Macintosh, "macintosh"},
- "dos-874": {charmap.Windows874, "windows-874"},
- "iso-8859-11": {charmap.Windows874, "windows-874"},
- "iso8859-11": {charmap.Windows874, "windows-874"},
- "iso885911": {charmap.Windows874, "windows-874"},
- "tis-620": {charmap.Windows874, "windows-874"},
- "windows-874": {charmap.Windows874, "windows-874"},
- "cp1250": {charmap.Windows1250, "windows-1250"},
- "windows-1250": {charmap.Windows1250, "windows-1250"},
- "x-cp1250": {charmap.Windows1250, "windows-1250"},
- "cp1251": {charmap.Windows1251, "windows-1251"},
- "windows-1251": {charmap.Windows1251, "windows-1251"},
- "x-cp1251": {charmap.Windows1251, "windows-1251"},
- "ansi_x3.4-1968": {charmap.Windows1252, "windows-1252"},
- "ascii": {charmap.Windows1252, "windows-1252"},
- "cp1252": {charmap.Windows1252, "windows-1252"},
- "cp819": {charmap.Windows1252, "windows-1252"},
- "csisolatin1": {charmap.Windows1252, "windows-1252"},
- "ibm819": {charmap.Windows1252, "windows-1252"},
- "iso-8859-1": {charmap.Windows1252, "windows-1252"},
- "iso-ir-100": {charmap.Windows1252, "windows-1252"},
- "iso8859-1": {charmap.Windows1252, "windows-1252"},
- "iso88591": {charmap.Windows1252, "windows-1252"},
- "iso_8859-1": {charmap.Windows1252, "windows-1252"},
- "iso_8859-1:1987": {charmap.Windows1252, "windows-1252"},
- "l1": {charmap.Windows1252, "windows-1252"},
- "latin1": {charmap.Windows1252, "windows-1252"},
- "us-ascii": {charmap.Windows1252, "windows-1252"},
- "windows-1252": {charmap.Windows1252, "windows-1252"},
- "x-cp1252": {charmap.Windows1252, "windows-1252"},
- "cp1253": {charmap.Windows1253, "windows-1253"},
- "windows-1253": {charmap.Windows1253, "windows-1253"},
- "x-cp1253": {charmap.Windows1253, "windows-1253"},
- "cp1254": {charmap.Windows1254, "windows-1254"},
- "csisolatin5": {charmap.Windows1254, "windows-1254"},
- "iso-8859-9": {charmap.Windows1254, "windows-1254"},
- "iso-ir-148": {charmap.Windows1254, "windows-1254"},
- "iso8859-9": {charmap.Windows1254, "windows-1254"},
- "iso88599": {charmap.Windows1254, "windows-1254"},
- "iso_8859-9": {charmap.Windows1254, "windows-1254"},
- "iso_8859-9:1989": {charmap.Windows1254, "windows-1254"},
- "l5": {charmap.Windows1254, "windows-1254"},
- "latin5": {charmap.Windows1254, "windows-1254"},
- "windows-1254": {charmap.Windows1254, "windows-1254"},
- "x-cp1254": {charmap.Windows1254, "windows-1254"},
- "cp1255": {charmap.Windows1255, "windows-1255"},
- "windows-1255": {charmap.Windows1255, "windows-1255"},
- "x-cp1255": {charmap.Windows1255, "windows-1255"},
- "cp1256": {charmap.Windows1256, "windows-1256"},
- "windows-1256": {charmap.Windows1256, "windows-1256"},
- "x-cp1256": {charmap.Windows1256, "windows-1256"},
- "cp1257": {charmap.Windows1257, "windows-1257"},
- "windows-1257": {charmap.Windows1257, "windows-1257"},
- "x-cp1257": {charmap.Windows1257, "windows-1257"},
- "cp1258": {charmap.Windows1258, "windows-1258"},
- "windows-1258": {charmap.Windows1258, "windows-1258"},
- "x-cp1258": {charmap.Windows1258, "windows-1258"},
- "x-mac-cyrillic": {charmap.MacintoshCyrillic, "x-mac-cyrillic"},
- "x-mac-ukrainian": {charmap.MacintoshCyrillic, "x-mac-cyrillic"},
- "chinese": {simplifiedchinese.GBK, "gbk"},
- "csgb2312": {simplifiedchinese.GBK, "gbk"},
- "csiso58gb231280": {simplifiedchinese.GBK, "gbk"},
- "gb2312": {simplifiedchinese.GBK, "gbk"},
- "gb_2312": {simplifiedchinese.GBK, "gbk"},
- "gb_2312-80": {simplifiedchinese.GBK, "gbk"},
- "gbk": {simplifiedchinese.GBK, "gbk"},
- "iso-ir-58": {simplifiedchinese.GBK, "gbk"},
- "x-gbk": {simplifiedchinese.GBK, "gbk"},
- "gb18030": {simplifiedchinese.GB18030, "gb18030"},
- "hz-gb-2312": {simplifiedchinese.HZGB2312, "hz-gb-2312"},
- "big5": {traditionalchinese.Big5, "big5"},
- "big5-hkscs": {traditionalchinese.Big5, "big5"},
- "cn-big5": {traditionalchinese.Big5, "big5"},
- "csbig5": {traditionalchinese.Big5, "big5"},
- "x-x-big5": {traditionalchinese.Big5, "big5"},
- "cseucpkdfmtjapanese": {japanese.EUCJP, "euc-jp"},
- "euc-jp": {japanese.EUCJP, "euc-jp"},
- "x-euc-jp": {japanese.EUCJP, "euc-jp"},
- "csiso2022jp": {japanese.ISO2022JP, "iso-2022-jp"},
- "iso-2022-jp": {japanese.ISO2022JP, "iso-2022-jp"},
- "csshiftjis": {japanese.ShiftJIS, "shift_jis"},
- "ms_kanji": {japanese.ShiftJIS, "shift_jis"},
- "shift-jis": {japanese.ShiftJIS, "shift_jis"},
- "shift_jis": {japanese.ShiftJIS, "shift_jis"},
- "sjis": {japanese.ShiftJIS, "shift_jis"},
- "windows-31j": {japanese.ShiftJIS, "shift_jis"},
- "x-sjis": {japanese.ShiftJIS, "shift_jis"},
- "cseuckr": {korean.EUCKR, "euc-kr"},
- "csksc56011987": {korean.EUCKR, "euc-kr"},
- "euc-kr": {korean.EUCKR, "euc-kr"},
- "iso-ir-149": {korean.EUCKR, "euc-kr"},
- "korean": {korean.EUCKR, "euc-kr"},
- "ks_c_5601-1987": {korean.EUCKR, "euc-kr"},
- "ks_c_5601-1989": {korean.EUCKR, "euc-kr"},
- "ksc5601": {korean.EUCKR, "euc-kr"},
- "ksc_5601": {korean.EUCKR, "euc-kr"},
- "windows-949": {korean.EUCKR, "euc-kr"},
- "csiso2022kr": {encoding.Replacement, "replacement"},
- "iso-2022-kr": {encoding.Replacement, "replacement"},
- "iso-2022-cn": {encoding.Replacement, "replacement"},
- "iso-2022-cn-ext": {encoding.Replacement, "replacement"},
- "utf-16be": {unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), "utf-16be"},
- "utf-16": {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
- "utf-16le": {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
- "x-user-defined": {charmap.XUserDefined, "x-user-defined"},
-}
diff --git a/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/testdata/HTTP-charset.html b/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/testdata/HTTP-charset.html
deleted file mode 100644
index 9915fa0..0000000
--- a/Godeps/_workspace/src/code.google.com/p/go.net/html/charset/testdata/HTTP-charset.html
+++ /dev/null
@@ -1,48 +0,0 @@
-
-
-
- HTTP charset The character encoding of a page can be set using the HTTP header charset declaration. The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector The only character encoding declaration for this HTML file is in the HTTP header, which sets the encoding to ISO 8859-15. the-input-byte-stream-001 HTTP vs UTF-8 BOM A character encoding set in the HTTP header has lower precedence than the UTF-8 signature. The HTTP header attempts to set the character encoding to ISO 8859-15. The page starts with a UTF-8 signature. The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector If the test is unsuccessful, the characters  should appear at the top of the page. These represent the bytes that make up the UTF-8 signature when encountered in the ISO 8859-15 encoding. the-input-byte-stream-034 HTTP vs meta charset The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute. The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-1. The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector the-input-byte-stream-018 HTTP vs meta content The HTTP header has a higher precedence than an encoding declaration in a meta content attribute. The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-1. The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector the-input-byte-stream-016 No encoding declaration A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8. The test on this page contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector the-input-byte-stream-015 UTF-8 BOM vs meta charset A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding. The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature. The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector the-input-byte-stream-038 UTF-8 BOM vs meta content A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding. The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature. The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector the-input-byte-stream-037 meta charset attribute The character encoding of the page can be set by a meta element with charset attribute. The only character encoding declaration for this HTML file is in the charset attribute of the meta element, which declares the encoding to be ISO 8859-15. The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector the-input-byte-stream-009 meta content attribute The character encoding of the page can be set by a meta element with http-equiv and content attributes. The only character encoding declaration for this HTML file is in the content attribute of the meta element, which declares the encoding to be ISO 8859-15. The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector the-input-byte-stream-007 Links:.test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.
Result summary & related tests
Detailed results for this test
Link to spec.test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.
Result summary & related tests
Detailed results for this test
Link to spec.test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.
Result summary & related tests
Detailed results for this test
Link to spec.test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.
Result summary & related tests
Detailed results for this test
Link to spec.test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.
Result summary & related tests
Detailed results for this test
Link to spec.test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.
Result summary & related tests
Detailed results for this test
Link to spec.test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.
Result summary & related tests
Detailed results for this test
Link to spec.test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.
Result summary & related tests
Detailed results for this test
Link to spec.test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.
Result summary & related tests
Detailed results for this test
Link to spec