From fd1e7465b873f043750de72b56fac9e6546a3b10 Mon Sep 17 00:00:00 2001 From: Jim Myhrberg Date: Sun, 8 Oct 2017 15:27:35 +0100 Subject: [PATCH] Initial commit --- .gitignore | 5 + 404.html | 9 + CNAME | 1 + Gemfile | 21 ++ Gemfile.lock | 102 ++++++ README.md | 5 + Rakefile | 85 +++++ _assets/css/_base.scss | 90 +++++ _assets/css/_highlight.scss | 62 ++++ _assets/css/_side-menu.scss | 248 ++++++++++++++ _assets/css/main.scss | 3 + _assets/js/main.js | 1 + _assets/js/ui.js | 44 +++ _config.yml | 50 +++ _layouts/default.html | 54 +++ docs/.nojekyll | 0 docs/404.html | 60 ++++ docs/CNAME | 1 + ...4820f95eecdd416e60b99950f2058c61a2e99f.css | 1 + ...0cf8afe78afa6b7b6f6bdb5aff91ac0256c0a1a.js | 1 + docs/index.html | 308 +++++++++++++++++ docs/robots.txt | 1 + docs/sitemap.xml | 9 + docs/spec/0.9.0-draft.1.html | 308 +++++++++++++++++ index.md | 312 ++++++++++++++++++ spec/0.9.0-draft.1.md | 312 ++++++++++++++++++ 26 files changed, 2093 insertions(+) create mode 100644 .gitignore create mode 100644 404.html create mode 100644 CNAME create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 README.md create mode 100644 Rakefile create mode 100644 _assets/css/_base.scss create mode 100644 _assets/css/_highlight.scss create mode 100644 _assets/css/_side-menu.scss create mode 100644 _assets/css/main.scss create mode 100644 _assets/js/main.js create mode 100644 _assets/js/ui.js create mode 100644 _config.yml create mode 100644 _layouts/default.html create mode 100644 docs/.nojekyll create mode 100644 docs/404.html create mode 100644 docs/CNAME create mode 100644 docs/assets/main-5df19fc13b2a391dcab974f0584820f95eecdd416e60b99950f2058c61a2e99f.css create mode 100644 docs/assets/main-870855580c69dec57be4c965d0cf8afe78afa6b7b6f6bdb5aff91ac0256c0a1a.js create mode 100644 docs/index.html create mode 100644 docs/robots.txt create mode 100644 docs/sitemap.xml create mode 100644 docs/spec/0.9.0-draft.1.html create mode 100644 index.md create mode 100644 spec/0.9.0-draft.1.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..404d8b1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +_site +.asset-cache +.sass-cache +.jekyll-metadata +docs/assets/.sprockets-manifest-*.json diff --git a/404.html b/404.html new file mode 100644 index 0000000..e6b61db --- /dev/null +++ b/404.html @@ -0,0 +1,9 @@ +--- +title: 404 Page Not Found +sitemap: false +--- +
+

404

+

Page not found :(

+

The requested page could not be found.

+
diff --git a/CNAME b/CNAME new file mode 100644 index 0000000..71f60da --- /dev/null +++ b/CNAME @@ -0,0 +1 @@ +csv-spec.org diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..fefd6a7 --- /dev/null +++ b/Gemfile @@ -0,0 +1,21 @@ +source 'https://rubygems.org' + +gem 'jekyll', '3.5.0' + +group :development do + gem 'rake' + gem 'rubocop' +end + +# If you have any plugins, put them here! +group :jekyll_plugins do + gem 'jekyll-assets' + gem 'jekyll-pants' + gem 'jekyll-seo-tag' + gem 'jekyll-sitemap' + gem 'jekyll-tidy' + gem 'uglifier' # required by 'jekyll-assets' for JS compression +end + +# Windows does not include zoneinfo files, so bundle the tzinfo-data gem +gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..91ecdd9 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,102 @@ +GEM + remote: https://rubygems.org/ + specs: + addressable (2.5.1) + public_suffix (~> 2.0, >= 2.0.2) + ast (2.3.0) + colorator (1.1.0) + concurrent-ruby (1.0.5) + execjs (2.7.0) + extras (0.3.0) + forwardable-extended (~> 2.5) + fastimage (2.1.0) + ffi (1.9.18) + forwardable-extended (2.6.0) + htmlbeautifier (1.3.1) + htmlcompressor (0.3.1) + jekyll (3.5.0) + addressable (~> 2.4) + colorator (~> 1.0) + jekyll-sass-converter (~> 1.0) + jekyll-watch (~> 1.1) + kramdown (~> 1.3) + liquid (~> 4.0) + mercenary (~> 0.3.3) + pathutil (~> 0.9) + rouge (~> 1.7) + safe_yaml (~> 1.0) + jekyll-assets (2.3.2) + concurrent-ruby (~> 1.0) + extras (~> 0.2) + fastimage (~> 2.0, >= 1.8) + jekyll (~> 3.1, >= 3.0) + pathutil (>= 0.8) + rack (~> 1.6) + sprockets (~> 3.3, < 3.8) + jekyll-pants (0.2.1) + rubypants + jekyll-sass-converter (1.5.0) + sass (~> 3.4) + jekyll-seo-tag (2.2.3) + jekyll (~> 3.3) + jekyll-sitemap (1.0.0) + jekyll (~> 3.3) + jekyll-tidy (0.2.2) + htmlbeautifier + htmlcompressor + jekyll + jekyll-watch (1.5.0) + listen (~> 3.0, < 3.1) + kramdown (1.14.0) + liquid (4.0.0) + listen (3.0.8) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + mercenary (0.3.6) + parser (2.4.0.0) + ast (~> 2.2) + pathutil (0.14.0) + forwardable-extended (~> 2.6) + powerpack (0.1.1) + public_suffix (2.0.5) + rack (1.6.8) + rainbow (2.2.1) + rake (12.0.0) + rb-fsevent (0.10.2) + rb-inotify (0.9.10) + ffi (>= 0.5.0, < 2) + rouge (1.11.1) + rubocop (0.47.1) + parser (>= 2.3.3.1, < 3.0) + powerpack (~> 0.1) + rainbow (>= 1.99.1, < 3.0) + ruby-progressbar (~> 1.7) + unicode-display_width (~> 1.0, >= 1.0.1) + ruby-progressbar (1.8.1) + rubypants (0.6.0) + safe_yaml (1.0.4) + sass (3.4.25) + sprockets (3.7.1) + concurrent-ruby (~> 1.0) + rack (> 1, < 3) + uglifier (3.2.0) + execjs (>= 0.3.0, < 3) + unicode-display_width (1.3.0) + +PLATFORMS + ruby + +DEPENDENCIES + jekyll (= 3.5.0) + jekyll-assets + jekyll-pants + jekyll-seo-tag + jekyll-sitemap + jekyll-tidy + rake + rubocop + tzinfo-data + uglifier + +BUNDLED WITH + 1.14.6 diff --git a/README.md b/README.md new file mode 100644 index 0000000..54685fc --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# csv-spec.org + +This is the site at http://csv-spec.org/ that attempts to describe CSV-like +formats in a obvious and easy to understand way, complete with code examples +aimed at developers. diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..49efba5 --- /dev/null +++ b/Rakefile @@ -0,0 +1,85 @@ +require 'open-uri' +require 'yaml' + +desc 'Build site into docs directory' +task :build do + jekyll_build +end + +desc 'Update index.md and spec folder based on versions in _config.yml' +task :update do + config = YAML.load_file('_config.yml') + current_version = config['current_version'] + versions = config['versions'] + + remove_all_specs(config['update']) + + puts '' + puts 'Fetching configured spec versions:' + versions.each do |version| + spec = fetch_spec(version, config['update']) + + if current_version == version + write_file('index.md', spec[:body], " (#{version})") + end + + filename = File.join(config['update']['output_dir'], version) + write_file("#{filename}.md", spec[:body]) + write_file("#{filename}.svg", spec[:diagram]) if spec[:diagram] + end + + jekyll_build +end + +def jekyll_build + puts 'Rebuilding output into docs directory...' + exec 'jekyll build --destination docs && touch docs/.nojekyll' +end + +def write_file(file, content, comment = nil) + puts " - #{file}#{comment}" + File.write(file, content) +end + +def fetch_spec(version, config) + document = get(build_file_url('document', version, config)) + + if config['files']['diagram'] + diagram = get(build_file_url('diagram', version, config)) + img_tag = config['img_tpl'].gsub('{{file}}', "#{version}.svg") + document.gsub!(/\A(.*\n=+\n)/, "\\1\n#{img_tag}\n") + end + + title = document.split("\n", 2).first + body = config['body_tpl'].gsub('{{content}}', document) + .gsub('{{title}}', title) + .gsub('{{version}}', version) + + { + version: version, + title: title, + body: body, + diagram: diagram + } +end + +def build_file_url(file, version, config) + config['url_tpl'] + .gsub('{{version}}', version) + .gsub('{{file}}', config['files'][file]) +end + +def get(url) + URI.parse(url).read +rescue OpenURI::HTTPError + nil +end + +def remove_all_specs(config) + puts '' + puts 'Removing existing spec files:' + Dir["#{config['output_dir']}/*"].each do |file| + puts " #{file.gsub(File.dirname(__FILE__), '')}" + File.delete(file) + end +end diff --git a/_assets/css/_base.scss b/_assets/css/_base.scss new file mode 100644 index 0000000..5cb3783 --- /dev/null +++ b/_assets/css/_base.scss @@ -0,0 +1,90 @@ +html { + height: 100%; +} + +body { + font-family: 'Open Sans', Helvetica, Arial, sans-serif; + font-size: 16px; + font-weight: 400; + line-height: 1.5; + color: #1a1a1a; + background-color: #fdfdfd; +} + +h1, h2, h3, h4, h5, h6 { + font-family: 'Open Sans Condensed', Helvetica, Arial, sans-serif; + font-weight: 700; + color: #333; +} + +h1 { + font-size: 2.5em; + line-height: 1.2; +} + +ol ol, ul ol { + list-style-type: lower-roman; +} + +ul ul ol, ul ol ol, ol ul ol, ol ol ol { + list-style-type: lower-alpha; +} + +.content { + margin-top: 80px; + + a { + word-break: break-word; + } + + code { + background-color: rgba(27,31,35,0.05); + border-radius: 3px; + font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; + font-size: 85%; + margin: 0; + padding: 0.3em 0.4em 0.1em 0.4em; + } + + pre { + background-color: #f6f8fa; + border-radius: 3px; + line-height: 1.45; + padding: 16px; + } + + pre > code { + background-color: transparent !important; + border-radius: none; + font-size: 90%; + padding: 0; + } +} + +#menu { + .pure-menu-label { + color: #999; + border: none; + padding: 0.6em 0 0.6em 0.6em; + } + + .links { + font-size: 50px; + position: absolute; + bottom: 10px; + left: 0px; + right: 0px; + text-align: center; + + a { + color: #555; + padding: 0; + position: relative; + text-decoration: none; + + &:hover { + color: #777; + } + } + } +} diff --git a/_assets/css/_highlight.scss b/_assets/css/_highlight.scss new file mode 100644 index 0000000..2192921 --- /dev/null +++ b/_assets/css/_highlight.scss @@ -0,0 +1,62 @@ +/* https://github.com/jwarby/jekyll-pygments-themes/blob/22dfd74f9f10c87c0ec98876e136f02c989d43ba/github.css */ +.highlight .hll { background-color: #ffffcc } +.highlight .c { color: #999988; font-style: italic } /* Comment */ +.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ +.highlight .k { color: #000000; font-weight: bold } /* Keyword */ +.highlight .o { color: #000000; font-weight: bold } /* Operator */ +.highlight .cm { color: #999988; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #999999; font-weight: bold; font-style: italic } /* Comment.Preproc */ +.highlight .c1 { color: #999988; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ +.highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #aa0000 } /* Generic.Error */ +.highlight .gh { color: #999999 } /* Generic.Heading */ +.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ +.highlight .go { color: #888888 } /* Generic.Output */ +.highlight .gp { color: #555555 } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #aaaaaa } /* Generic.Subheading */ +.highlight .gt { color: #aa0000 } /* Generic.Traceback */ +.highlight .kc { color: #000000; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #000000; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #000000; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #000000; font-weight: bold } /* Keyword.Pseudo */ +.highlight .kr { color: #000000; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #445588; font-weight: bold } /* Keyword.Type */ +.highlight .m { color: #009999 } /* Literal.Number */ +.highlight .s { color: #d01040 } /* Literal.String */ +.highlight .na { color: #008080 } /* Name.Attribute */ +.highlight .nb { color: #0086B3 } /* Name.Builtin */ +.highlight .nc { color: #445588; font-weight: bold } /* Name.Class */ +.highlight .no { color: #008080 } /* Name.Constant */ +.highlight .nd { color: #3c5d5d; font-weight: bold } /* Name.Decorator */ +.highlight .ni { color: #800080 } /* Name.Entity */ +.highlight .ne { color: #990000; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #990000; font-weight: bold } /* Name.Function */ +.highlight .nl { color: #990000; font-weight: bold } /* Name.Label */ +.highlight .nn { color: #555555 } /* Name.Namespace */ +.highlight .nt { color: #000080 } /* Name.Tag */ +.highlight .nv { color: #008080 } /* Name.Variable */ +.highlight .ow { color: #000000; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mf { color: #009999 } /* Literal.Number.Float */ +.highlight .mh { color: #009999 } /* Literal.Number.Hex */ +.highlight .mi { color: #009999 } /* Literal.Number.Integer */ +.highlight .mo { color: #009999 } /* Literal.Number.Oct */ +.highlight .sb { color: #d01040 } /* Literal.String.Backtick */ +.highlight .sc { color: #d01040 } /* Literal.String.Char */ +.highlight .sd { color: #d01040 } /* Literal.String.Doc */ +.highlight .s2 { color: #d01040 } /* Literal.String.Double */ +.highlight .se { color: #d01040 } /* Literal.String.Escape */ +.highlight .sh { color: #d01040 } /* Literal.String.Heredoc */ +.highlight .si { color: #d01040 } /* Literal.String.Interpol */ +.highlight .sx { color: #d01040 } /* Literal.String.Other */ +.highlight .sr { color: #009926 } /* Literal.String.Regex */ +.highlight .s1 { color: #d01040 } /* Literal.String.Single */ +.highlight .ss { color: #990073 } /* Literal.String.Symbol */ +.highlight .bp { color: #999999 } /* Name.Builtin.Pseudo */ +.highlight .vc { color: #008080 } /* Name.Variable.Class */ +.highlight .vg { color: #008080 } /* Name.Variable.Global */ +.highlight .vi { color: #008080 } /* Name.Variable.Instance */ +.highlight .il { color: #009999 } /* Literal.Number.Integer.Long */ diff --git a/_assets/css/_side-menu.scss b/_assets/css/_side-menu.scss new file mode 100644 index 0000000..7abd61c --- /dev/null +++ b/_assets/css/_side-menu.scss @@ -0,0 +1,248 @@ +body { + color: #777; +} + +.pure-img-responsive { + max-width: 100%; + height: auto; +} + +/* +Add transition to containers so they can push in and out. +*/ +#layout, +#menu, +.menu-link { + -webkit-transition: all 0.2s ease-out; + -moz-transition: all 0.2s ease-out; + -ms-transition: all 0.2s ease-out; + -o-transition: all 0.2s ease-out; + transition: all 0.2s ease-out; +} + +/* +This is the parent `
` that contains the menu and the content area. +*/ +#layout { + position: relative; + left: 0; + padding-left: 0; +} + #layout.active #menu { + left: 150px; + width: 150px; + } + + #layout.active .menu-link { + left: 150px; + } +/* +The content `
` is where all your content goes. +*/ +.content { + margin: 0 auto; + padding: 0 2em; + max-width: 800px; + margin-bottom: 50px; + line-height: 1.6em; +} + +.header { + margin: 0; + color: #333; + text-align: center; + padding: 2.5em 2em 0; + border-bottom: 1px solid #eee; + } + .header h1 { + margin: 0.2em 0; + font-size: 3em; + font-weight: 300; + } + .header h2 { + font-weight: 300; + color: #ccc; + padding: 0; + margin-top: 0; + } + +.content-subhead { + margin: 50px 0 20px 0; + font-weight: 300; + color: #888; +} + + + +/* +The `#menu` `
` is the parent `
` that contains the `.pure-menu` that +appears on the left side of the page. +*/ + +#menu { + margin-left: -150px; /* "#menu" width */ + width: 150px; + position: fixed; + top: 0; + left: 0; + bottom: 0; + z-index: 1000; /* so the menu or its navicon stays above all content */ + background: #191818; + overflow-y: auto; + -webkit-overflow-scrolling: touch; +} + /* + All anchors inside the menu should be styled like this. + */ + #menu a { + color: #999; + border: none; + padding: 0.6em 0 0.6em 0.6em; + } + + /* + Remove all background/borders, since we are applying them to #menu. + */ + #menu .pure-menu, + #menu .pure-menu ul { + border: none; + background: transparent; + } + + /* + Add that light border to separate items into groups. + */ + #menu .pure-menu ul, + #menu .pure-menu .menu-item-divided { + border-top: 1px solid #333; + } + /* + Change color of the anchor links on hover/focus. + */ + #menu .pure-menu li a:hover, + #menu .pure-menu li a:focus { + background: #333; + } + + /* + This styles the selected menu item `
  • `. + */ + #menu .pure-menu-selected, + #menu .pure-menu-heading { + background: #1f8dd6; + } + /* + This styles a link within a selected menu item `
  • `. + */ + #menu .pure-menu-selected a { + color: #fff; + } + + /* + This styles the menu heading. + */ + #menu .pure-menu-heading { + font-size: 110%; + color: #fff; + margin: 0; + } + +/* -- Dynamic Button For Responsive Menu -------------------------------------*/ + +/* +The button to open/close the Menu is custom-made and not part of Pure. Here's +how it works: +*/ + +/* +`.menu-link` represents the responsive menu toggle that shows/hides on +small screens. +*/ +.menu-link { + position: fixed; + display: block; /* show this only on small screens */ + top: 0; + left: 0; /* "#menu width" */ + background: #000; + background: rgba(0,0,0,0.7); + font-size: 10px; /* change this value to increase/decrease button size */ + z-index: 10; + width: 2em; + height: auto; + padding: 2.1em 1.6em; +} + + .menu-link:hover, + .menu-link:focus { + background: #000; + } + + .menu-link span { + position: relative; + display: block; + } + + .menu-link span, + .menu-link span:before, + .menu-link span:after { + background-color: #fff; + width: 100%; + height: 0.2em; + } + + .menu-link span:before, + .menu-link span:after { + position: absolute; + margin-top: -0.6em; + content: " "; + } + + .menu-link span:after { + margin-top: 0.6em; + } + + +/* -- Responsive Styles (Media Queries) ------------------------------------- */ + +/* +Hides the menu at `48em`, but modify this based on your app's needs. +*/ +@media (min-width: 48em) { + + .header, + .content { + padding-left: 2em; + padding-right: 2em; + } + + #layout { + padding-left: 150px; /* left col width "#menu" */ + left: 0; + } + #menu { + left: 150px; + } + + .menu-link { + position: fixed; + left: 150px; + display: none; + } + + #layout.active .menu-link { + left: 150px; + } +} + +@media (max-width: 48em) { + /* Only apply this when the window is small. Otherwise, the following + case results in extra padding on the left: + * Make the window small. + * Tap the menu to trigger the active state. + * Make the window large again. + */ + #layout.active { + position: relative; + left: 150px; + } +} diff --git a/_assets/css/main.scss b/_assets/css/main.scss new file mode 100644 index 0000000..6108420 --- /dev/null +++ b/_assets/css/main.scss @@ -0,0 +1,3 @@ +@import "side-menu"; +@import "highlight"; +@import "base"; diff --git a/_assets/js/main.js b/_assets/js/main.js new file mode 100644 index 0000000..afe5bb6 --- /dev/null +++ b/_assets/js/main.js @@ -0,0 +1 @@ +// = require ui diff --git a/_assets/js/ui.js b/_assets/js/ui.js new file mode 100644 index 0000000..4308341 --- /dev/null +++ b/_assets/js/ui.js @@ -0,0 +1,44 @@ +(function (window, document) { + var layout = document.getElementById('layout'); + var menu = document.getElementById('menu'); + var menuLink = document.getElementById('menuLink'); + var content = document.getElementById('main'); + + function toggleClass (element, className) { + var classes = element.className.split(/\s+/); + var length = classes.length; + var i = 0; + + for (; i < length; i++) { + if (classes[i] === className) { + classes.splice(i, 1); + break; + } + } + // The className is not found + if (length === classes.length) { + classes.push(className); + } + + element.className = classes.join(' '); + } + + function toggleAll (e) { + var active = 'active'; + + e.preventDefault(); + toggleClass(layout, active); + toggleClass(menu, active); + toggleClass(menuLink, active); + } + + menuLink.onclick = function (e) { + toggleAll(e); + }; + + content.onclick = function (e) { + if (menu.className.indexOf('active') !== -1) { + toggleAll(e); + } + }; +}(this, this.document)); diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..8318ab0 --- /dev/null +++ b/_config.yml @@ -0,0 +1,50 @@ +title: CSV Spec +description: > + An attempt to describe CSV-like formats in a obvious and easy to understand + way, complete with code examples aimed at developers. +author: Jim Myhrberg +hostname: csv-spec.org +url: https://csv-spec.org +repo_url: https://github.com/parsecsv/csv-spec + +current_version: 0.9.0-draft.1 +versions: + - 0.9.0-draft.1 + +exclude: + - Gemfile + - Gemfile.lock + - Rakefile + - README.md + +update: + body_tpl: | + --- + title: {{title}} + version: {{version}} + --- + {{content}} + url_tpl: "https://github.com/parsecsv/csv-spec/raw/{{version}}/{{file}}" + output_dir: "spec" + files: + document: csv-spec.md + +plugins: + - jekyll-assets + - jekyll-pants + - jekyll-sitemap + - jekyll-seo-tag + - jekyll-tidy + +defaults: + - + scope: + path: "" + values: + layout: "default" + +assets: + digest: true + compress: + css: true + js: true diff --git a/_layouts/default.html b/_layouts/default.html new file mode 100644 index 0000000..9e047d6 --- /dev/null +++ b/_layouts/default.html @@ -0,0 +1,54 @@ + + + + + + + + + + {% css main %} + {% seo %} + + +
    + + + + + +
    +
    + {{ content }} +
    +
    +
    + {% js main %} + + diff --git a/docs/.nojekyll b/docs/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 0000000..ddccd81 --- /dev/null +++ b/docs/404.html @@ -0,0 +1,60 @@ + + + + + + + + + + + + 404 Page Not Found | CSV Spec + + + + + + + + + + + + +
    + + + + +
    +
    +
    +

    404

    +

    Page not found :(

    +

    The requested page could not be found.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/docs/CNAME b/docs/CNAME new file mode 100644 index 0000000..71f60da --- /dev/null +++ b/docs/CNAME @@ -0,0 +1 @@ +csv-spec.org diff --git a/docs/assets/main-5df19fc13b2a391dcab974f0584820f95eecdd416e60b99950f2058c61a2e99f.css b/docs/assets/main-5df19fc13b2a391dcab974f0584820f95eecdd416e60b99950f2058c61a2e99f.css new file mode 100644 index 0000000..05248ba --- /dev/null +++ b/docs/assets/main-5df19fc13b2a391dcab974f0584820f95eecdd416e60b99950f2058c61a2e99f.css @@ -0,0 +1 @@ +body{color:#777}.pure-img-responsive{max-width:100%;height:auto}#layout,#menu,.menu-link{-webkit-transition:all 0.2s ease-out;-moz-transition:all 0.2s ease-out;-ms-transition:all 0.2s ease-out;-o-transition:all 0.2s ease-out;transition:all 0.2s ease-out}#layout{position:relative;left:0;padding-left:0}#layout.active #menu{left:150px;width:150px}#layout.active .menu-link{left:150px}.content{margin:0 auto;padding:0 2em;max-width:800px;margin-bottom:50px;line-height:1.6em}.header{margin:0;color:#333;text-align:center;padding:2.5em 2em 0;border-bottom:1px solid #eee}.header h1{margin:0.2em 0;font-size:3em;font-weight:300}.header h2{font-weight:300;color:#ccc;padding:0;margin-top:0}.content-subhead{margin:50px 0 20px 0;font-weight:300;color:#888}#menu{margin-left:-150px;width:150px;position:fixed;top:0;left:0;bottom:0;z-index:1000;background:#191818;overflow-y:auto;-webkit-overflow-scrolling:touch}#menu a{color:#999;border:none;padding:0.6em 0 0.6em 0.6em}#menu .pure-menu,#menu .pure-menu ul{border:none;background:transparent}#menu .pure-menu ul,#menu .pure-menu .menu-item-divided{border-top:1px solid #333}#menu .pure-menu li a:hover,#menu .pure-menu li a:focus{background:#333}#menu .pure-menu-selected,#menu .pure-menu-heading{background:#1f8dd6}#menu .pure-menu-selected a{color:#fff}#menu .pure-menu-heading{font-size:110%;color:#fff;margin:0}.menu-link{position:fixed;display:block;top:0;left:0;background:#000;background:rgba(0,0,0,0.7);font-size:10px;z-index:10;width:2em;height:auto;padding:2.1em 1.6em}.menu-link:hover,.menu-link:focus{background:#000}.menu-link span{position:relative;display:block}.menu-link span,.menu-link span:before,.menu-link span:after{background-color:#fff;width:100%;height:0.2em}.menu-link span:before,.menu-link span:after{position:absolute;margin-top:-0.6em;content:" "}.menu-link span:after{margin-top:0.6em}@media (min-width: 48em){.header,.content{padding-left:2em;padding-right:2em}#layout{padding-left:150px;left:0}#menu{left:150px}.menu-link{position:fixed;left:150px;display:none}#layout.active .menu-link{left:150px}}@media (max-width: 48em){#layout.active{position:relative;left:150px}}.highlight .hll{background-color:#ffffcc}.highlight .c{color:#999988;font-style:italic}.highlight .err{color:#a61717;background-color:#e3d2d2}.highlight .k{color:#000000;font-weight:bold}.highlight .o{color:#000000;font-weight:bold}.highlight .cm{color:#999988;font-style:italic}.highlight .cp{color:#999999;font-weight:bold;font-style:italic}.highlight .c1{color:#999988;font-style:italic}.highlight .cs{color:#999999;font-weight:bold;font-style:italic}.highlight .gd{color:#000000;background-color:#ffdddd}.highlight .ge{color:#000000;font-style:italic}.highlight .gr{color:#aa0000}.highlight .gh{color:#999999}.highlight .gi{color:#000000;background-color:#ddffdd}.highlight .go{color:#888888}.highlight .gp{color:#555555}.highlight .gs{font-weight:bold}.highlight .gu{color:#aaaaaa}.highlight .gt{color:#aa0000}.highlight .kc{color:#000000;font-weight:bold}.highlight .kd{color:#000000;font-weight:bold}.highlight .kn{color:#000000;font-weight:bold}.highlight .kp{color:#000000;font-weight:bold}.highlight .kr{color:#000000;font-weight:bold}.highlight .kt{color:#445588;font-weight:bold}.highlight .m{color:#009999}.highlight .s{color:#d01040}.highlight .na{color:#008080}.highlight .nb{color:#0086B3}.highlight .nc{color:#445588;font-weight:bold}.highlight .no{color:#008080}.highlight .nd{color:#3c5d5d;font-weight:bold}.highlight .ni{color:#800080}.highlight .ne{color:#990000;font-weight:bold}.highlight .nf{color:#990000;font-weight:bold}.highlight .nl{color:#990000;font-weight:bold}.highlight .nn{color:#555555}.highlight .nt{color:#000080}.highlight .nv{color:#008080}.highlight .ow{color:#000000;font-weight:bold}.highlight .w{color:#bbbbbb}.highlight .mf{color:#009999}.highlight .mh{color:#009999}.highlight .mi{color:#009999}.highlight .mo{color:#009999}.highlight .sb{color:#d01040}.highlight .sc{color:#d01040}.highlight .sd{color:#d01040}.highlight .s2{color:#d01040}.highlight .se{color:#d01040}.highlight .sh{color:#d01040}.highlight .si{color:#d01040}.highlight .sx{color:#d01040}.highlight .sr{color:#009926}.highlight .s1{color:#d01040}.highlight .ss{color:#990073}.highlight .bp{color:#999999}.highlight .vc{color:#008080}.highlight .vg{color:#008080}.highlight .vi{color:#008080}.highlight .il{color:#009999}html{height:100%}body{font-family:'Open Sans', Helvetica, Arial, sans-serif;font-size:16px;font-weight:400;line-height:1.5;color:#1a1a1a;background-color:#fdfdfd}h1,h2,h3,h4,h5,h6{font-family:'Open Sans Condensed', Helvetica, Arial, sans-serif;font-weight:700;color:#333}h1{font-size:2.5em;line-height:1.2}ol ol,ul ol{list-style-type:lower-roman}ul ul ol,ul ol ol,ol ul ol,ol ol ol{list-style-type:lower-alpha}.content{margin-top:80px}.content a{word-break:break-word}.content code{background-color:rgba(27,31,35,0.05);border-radius:3px;font-family:"SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace;font-size:85%;margin:0;padding:0.3em 0.4em 0.1em 0.4em}.content pre{background-color:#f6f8fa;border-radius:3px;line-height:1.45;padding:16px}.content pre>code{background-color:transparent !important;border-radius:none;font-size:90%;padding:0}#menu .pure-menu-label{color:#999;border:none;padding:0.6em 0 0.6em 0.6em}#menu .links{font-size:50px;position:absolute;bottom:10px;left:0px;right:0px;text-align:center}#menu .links a{color:#555;padding:0;position:relative;text-decoration:none}#menu .links a:hover{color:#777} diff --git a/docs/assets/main-870855580c69dec57be4c965d0cf8afe78afa6b7b6f6bdb5aff91ac0256c0a1a.js b/docs/assets/main-870855580c69dec57be4c965d0cf8afe78afa6b7b6f6bdb5aff91ac0256c0a1a.js new file mode 100644 index 0000000..fa63c7a --- /dev/null +++ b/docs/assets/main-870855580c69dec57be4c965d0cf8afe78afa6b7b6f6bdb5aff91ac0256c0a1a.js @@ -0,0 +1 @@ +!function(e,n){function t(e,n){for(var t=e.className.split(/\s+/),i=t.length,c=0;c + + + + + + + + + + + CSV Spec 0.9.0-draft.1 | CSV Spec + + + + + + + + + + + + +
    + + + + +
    +
    +

    CSV Spec 0.9.0-draft.1

    +

    Summary

    +

    CSV is not a file format, it is a loose set of guidelines of how to structure + tabular data into a plain text string. As such there’s an endless amount of + *.csv files floating around which are highly incompatible with each other. The + closest thing there is to a specification is RFC + 4180.

    +

    Goals

    +

    This project is an attempt to summarize RFC 4180 and the information in the + Comma-separated values + (CSV) Wikipedia article + into a easy to understand format. The spec will also take into account that the + comma (,) character is not the only character used as a field + delimiter. Semi-colons (;), tabs (\t), and more are popular field delimiter + characters. As such the specification will more accurately be describing a + CSV-like structured data format.

    +

    We will also provide input/output test files that CSV parser/writer software + libraries can use to validate if they properly adhere to the rules laid out in + this specification. And if possible we will even try to provide code snippets in + various languages that attempts to automatically determine the delimiter + character used in any given input CSV-like formatted file/data.

    +

    Roadmap

    +
      +
    1. Write up core specification rules. [in-progress]
    2. +
    3. Create input/output test files covering all rules in the specification.
    4. +
    5. Create website for csv-spec.org.
    6. +
    7. Create linting tool as a NPM module, allowing easy validation of CSV data + both client-side in a web browser, and server side via a command line tool.
    8. +
    9. Create automatic delimiter character detection code snippets in various + programming languages which CSV parser developers can freely use to enhance + their libraries.
    10. +
    +

    Terminology

    +
      +
    • Field — A singular String value within a record.
    • +
    • Record (or Row) — A collection of fields. This is often referred to as + a “line”, but a single record can span multiple text lines if a field within + it contains one or more line breaks.
    • +
    • Delimiter — The character used to separate fields withing a row. Commonly + this will be a comma (,), but semi-colons (;) or tabs (\t) are two other + popular delimiter characters.
    • +
    • Header — The first row is often used to contain the column names for all + remaining rows. Header names would be used as key names when CSV data is + converted to JSON for example.
    • +
    • Line Break — Line breaks in CSV files can be CRLF (\r\n), LF (\n), and + even in rare cases CR (\r).
    • +
    • LF, CR, and CRLF — Different types of line breaks, typically determined by + the OS. Linux, OSX, and other *NIX operating systems generally use a line feed + (LF or \n) character. Windows uses a carriage return (CR or \r) and a line + feed character, effectively “CRLF” (\r\n).
    • +
    +

    CSV Format Specification

    +

    The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, + “SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be + interpreted as described in RFC 2119.

    +

    These rules are mostly based on the corresponding section from RFC + 4180, with minor changes, + clarifications and improved examples.

    +
      +
    1. +

      Each record starts at the beginning of its own line, and ends with a line + break (shown as ¬).

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,yyy,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    2. +
    3. +

      Though it is RECOMMENDED, the last record in a file is not required to have a + ending line break.

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,yyy,zzz
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    4. +
    5. +

      There may be an OPTIONAL header line appearing as the first line of the file + with the same format as normal records. This header will contain names + corresponding to the fields in the file, and MUST contain the same number of + fields as the records in the rest of the file.

      +

      CSV:

      +
      field_1,field_2,field_3¬
      +aaa,bbb,ccc¬
      +xxx,yyy,zzz¬
      +
      +

      JSON (ignoring headers):

      +
      +
      [ ["field_1", "field_2", "field_3"],
      +  ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +

      JSON (using headers):

      +
      +
      [ {"field_1": "aaa", "field_2": "bbb", "field_3": "ccc"},
      +  {"field_1": "xxx", "field_2": "yyy", "field_3": "zzz"} ]
      +
      +
      +
    6. +
    7. +

      Within each record and the OPTIONAL header, there may be one or more fields, + separated by a delimiter (normally a comma). Each record MUST contain the + same number of fields throughout the file.

      +

      CSV (invalid):

      +
      aaa,bbb,ccc¬
      +111,222,333,444¬
      +xxx,yyy,zzz¬
      +
      +
    8. +
    9. +

      The last field in a record MUST NOT be followed by a comma. This results in a + additional field with nothing in it.

      +

      CSV:

      +
      aaa,bbb,ccc,¬
      +xxx,yyy,zzz,¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc", ""],
      +  ["xxx", "yyy", "zzz", ""] ]
      +
      +
      +
    10. +
    11. +

      Spaces are considered part of a field and MUST NOT be ignored.

      +

      CSV:

      +
      aaa ,  bbb , ccc¬
      + xxx, yyy  ,zzz ¬
      +
      +

      JSON:

      +
      +
      [ ["aaa ", "  bbb ", " ccc"],
      +  [" xxx", " yyy  ", "zzz "] ]
      +
      +
      +
    12. +
    13. +

      Fields containing line breaks (CRLF, LF, or CR), double quotes, or the + delimiter character (normally a comma) MUST be enclosed in double-quotes.

      +

      CSV:

      +
      aaa,"b¬
      +bb",ccc¬
      +xxx,"y, yy",zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "b\r\nbb", "ccc"],
      +  ["xxx", "y, yy", "zzz"] ]
      +
      +
      +
    14. +
    15. +

      A double-quote appearing inside a field MUST be escaped by preceding it with + another double quote, and the field itself MUST be enclosed in double quotes.

      +

      CSV:

      +
      aaa,"b""bb",ccc¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "b\"bb", "ccc"] ]
      +
      +
      +
    16. +
    17. +

      When a field enclosed in double quotes has spaces before and/or after the + double quotes, the spaces MUST be ignored, as the field starts and ends with + the double quotes. However this is considered invalid formatting and the CSV + parser SHOULD report some form of warning message.

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,  "y, yy" ,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "y, yy", "zzz"] ]
      +
      +
      +
    18. +
    19. +

      It is possible to enclose every field in double quotes even if they don’t + need to be enclosed. However it is RECOMMENDED to only enclose fields in + double quotes that requires it.

      +

      CSV:

      +
      "aaa","bbb","ccc"¬
      +"xxx",yyy,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    20. +
    21. +

      All fields are always strings. CSV itself does not support type casting to + integers, floats, booleans, or anything else. It is not a CSV library’s + responsibility to type cast input CSV data.

      +

      If type casting is required, it is up to the developer using a specific CSV + library to ensure types are correctly dealt with.

      +

      Input JSON:

      +
      +
      [ [10, true, 0.3, null, "aaa"],
      +  [11, false, 2.13, "", "bbb"] ]
      +
      +
      +

      Output CSV:

      +
      10,true,0.3,,aaa¬
      +11,false,2.13,,bbb¬
      +
      +

      Output CSV parsed back to JSON:

      +
      +
      [ ["10", "true", "0.3", "", "aaa"],
      +  ["11", "false", "2.13", "", "bbb"] ]
      +
      +
      +

      At this point it is up to the developer themselves to type cast the above + output data from the CSV parser.

      +
    22. +
    23. However, when rendering type cast input data to CSV text, non-string types + MUST be converted to a string in such a way that minimal information is + lost. +
        +
      • Integers and floats MUST be rendered as a string version of themselves.
      • +
      • Booleans true and false MUST be rendered as true and false + strings, not as 1 or 0 numbers. If numbers are used the resulting + CSV data is indistinguishable from actual integer numbers.
      • +
      • Null/nil values MUST be rendered as empty strings.
      • +
      +
    24. +
    25. When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST + be supported.
    26. +
    27. When rendering output CSV data, CRLF MUST be used for line breaks to ensure + maximum cross-platform compatibility.
    28. +
    +

    About

    +

    This CSV specification is authored by Jim Myhrberg.

    +

    If you’d like to leave feedback, + please open an issue on GitHub.

    +

    License

    +

    CC0 1.0 Universal

    +
    +
    +
    + + + \ No newline at end of file diff --git a/docs/robots.txt b/docs/robots.txt new file mode 100644 index 0000000..e4a67c4 --- /dev/null +++ b/docs/robots.txt @@ -0,0 +1 @@ +Sitemap: https://csv-spec.org/sitemap.xml \ No newline at end of file diff --git a/docs/sitemap.xml b/docs/sitemap.xml new file mode 100644 index 0000000..1141125 --- /dev/null +++ b/docs/sitemap.xml @@ -0,0 +1,9 @@ + + + + https://csv-spec.org/spec/0.9.0-draft.1.html + + + https://csv-spec.org/ + + \ No newline at end of file diff --git a/docs/spec/0.9.0-draft.1.html b/docs/spec/0.9.0-draft.1.html new file mode 100644 index 0000000..e854fe1 --- /dev/null +++ b/docs/spec/0.9.0-draft.1.html @@ -0,0 +1,308 @@ + + + + + + + + + + + + CSV Spec 0.9.0-draft.1 | CSV Spec + + + + + + + + + + + + +
    + + + + +
    +
    +

    CSV Spec 0.9.0-draft.1

    +

    Summary

    +

    CSV is not a file format, it is a loose set of guidelines of how to structure + tabular data into a plain text string. As such there’s an endless amount of + *.csv files floating around which are highly incompatible with each other. The + closest thing there is to a specification is RFC + 4180.

    +

    Goals

    +

    This project is an attempt to summarize RFC 4180 and the information in the + Comma-separated values + (CSV) Wikipedia article + into a easy to understand format. The spec will also take into account that the + comma (,) character is not the only character used as a field + delimiter. Semi-colons (;), tabs (\t), and more are popular field delimiter + characters. As such the specification will more accurately be describing a + CSV-like structured data format.

    +

    We will also provide input/output test files that CSV parser/writer software + libraries can use to validate if they properly adhere to the rules laid out in + this specification. And if possible we will even try to provide code snippets in + various languages that attempts to automatically determine the delimiter + character used in any given input CSV-like formatted file/data.

    +

    Roadmap

    +
      +
    1. Write up core specification rules. [in-progress]
    2. +
    3. Create input/output test files covering all rules in the specification.
    4. +
    5. Create website for csv-spec.org.
    6. +
    7. Create linting tool as a NPM module, allowing easy validation of CSV data + both client-side in a web browser, and server side via a command line tool.
    8. +
    9. Create automatic delimiter character detection code snippets in various + programming languages which CSV parser developers can freely use to enhance + their libraries.
    10. +
    +

    Terminology

    +
      +
    • Field — A singular String value within a record.
    • +
    • Record (or Row) — A collection of fields. This is often referred to as + a “line”, but a single record can span multiple text lines if a field within + it contains one or more line breaks.
    • +
    • Delimiter — The character used to separate fields withing a row. Commonly + this will be a comma (,), but semi-colons (;) or tabs (\t) are two other + popular delimiter characters.
    • +
    • Header — The first row is often used to contain the column names for all + remaining rows. Header names would be used as key names when CSV data is + converted to JSON for example.
    • +
    • Line Break — Line breaks in CSV files can be CRLF (\r\n), LF (\n), and + even in rare cases CR (\r).
    • +
    • LF, CR, and CRLF — Different types of line breaks, typically determined by + the OS. Linux, OSX, and other *NIX operating systems generally use a line feed + (LF or \n) character. Windows uses a carriage return (CR or \r) and a line + feed character, effectively “CRLF” (\r\n).
    • +
    +

    CSV Format Specification

    +

    The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, + “SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be + interpreted as described in RFC 2119.

    +

    These rules are mostly based on the corresponding section from RFC + 4180, with minor changes, + clarifications and improved examples.

    +
      +
    1. +

      Each record starts at the beginning of its own line, and ends with a line + break (shown as ¬).

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,yyy,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    2. +
    3. +

      Though it is RECOMMENDED, the last record in a file is not required to have a + ending line break.

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,yyy,zzz
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    4. +
    5. +

      There may be an OPTIONAL header line appearing as the first line of the file + with the same format as normal records. This header will contain names + corresponding to the fields in the file, and MUST contain the same number of + fields as the records in the rest of the file.

      +

      CSV:

      +
      field_1,field_2,field_3¬
      +aaa,bbb,ccc¬
      +xxx,yyy,zzz¬
      +
      +

      JSON (ignoring headers):

      +
      +
      [ ["field_1", "field_2", "field_3"],
      +  ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +

      JSON (using headers):

      +
      +
      [ {"field_1": "aaa", "field_2": "bbb", "field_3": "ccc"},
      +  {"field_1": "xxx", "field_2": "yyy", "field_3": "zzz"} ]
      +
      +
      +
    6. +
    7. +

      Within each record and the OPTIONAL header, there may be one or more fields, + separated by a delimiter (normally a comma). Each record MUST contain the + same number of fields throughout the file.

      +

      CSV (invalid):

      +
      aaa,bbb,ccc¬
      +111,222,333,444¬
      +xxx,yyy,zzz¬
      +
      +
    8. +
    9. +

      The last field in a record MUST NOT be followed by a comma. This results in a + additional field with nothing in it.

      +

      CSV:

      +
      aaa,bbb,ccc,¬
      +xxx,yyy,zzz,¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc", ""],
      +  ["xxx", "yyy", "zzz", ""] ]
      +
      +
      +
    10. +
    11. +

      Spaces are considered part of a field and MUST NOT be ignored.

      +

      CSV:

      +
      aaa ,  bbb , ccc¬
      + xxx, yyy  ,zzz ¬
      +
      +

      JSON:

      +
      +
      [ ["aaa ", "  bbb ", " ccc"],
      +  [" xxx", " yyy  ", "zzz "] ]
      +
      +
      +
    12. +
    13. +

      Fields containing line breaks (CRLF, LF, or CR), double quotes, or the + delimiter character (normally a comma) MUST be enclosed in double-quotes.

      +

      CSV:

      +
      aaa,"b¬
      +bb",ccc¬
      +xxx,"y, yy",zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "b\r\nbb", "ccc"],
      +  ["xxx", "y, yy", "zzz"] ]
      +
      +
      +
    14. +
    15. +

      A double-quote appearing inside a field MUST be escaped by preceding it with + another double quote, and the field itself MUST be enclosed in double quotes.

      +

      CSV:

      +
      aaa,"b""bb",ccc¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "b\"bb", "ccc"] ]
      +
      +
      +
    16. +
    17. +

      When a field enclosed in double quotes has spaces before and/or after the + double quotes, the spaces MUST be ignored, as the field starts and ends with + the double quotes. However this is considered invalid formatting and the CSV + parser SHOULD report some form of warning message.

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,  "y, yy" ,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "y, yy", "zzz"] ]
      +
      +
      +
    18. +
    19. +

      It is possible to enclose every field in double quotes even if they don’t + need to be enclosed. However it is RECOMMENDED to only enclose fields in + double quotes that requires it.

      +

      CSV:

      +
      "aaa","bbb","ccc"¬
      +"xxx",yyy,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    20. +
    21. +

      All fields are always strings. CSV itself does not support type casting to + integers, floats, booleans, or anything else. It is not a CSV library’s + responsibility to type cast input CSV data.

      +

      If type casting is required, it is up to the developer using a specific CSV + library to ensure types are correctly dealt with.

      +

      Input JSON:

      +
      +
      [ [10, true, 0.3, null, "aaa"],
      +  [11, false, 2.13, "", "bbb"] ]
      +
      +
      +

      Output CSV:

      +
      10,true,0.3,,aaa¬
      +11,false,2.13,,bbb¬
      +
      +

      Output CSV parsed back to JSON:

      +
      +
      [ ["10", "true", "0.3", "", "aaa"],
      +  ["11", "false", "2.13", "", "bbb"] ]
      +
      +
      +

      At this point it is up to the developer themselves to type cast the above + output data from the CSV parser.

      +
    22. +
    23. However, when rendering type cast input data to CSV text, non-string types + MUST be converted to a string in such a way that minimal information is + lost. +
        +
      • Integers and floats MUST be rendered as a string version of themselves.
      • +
      • Booleans true and false MUST be rendered as true and false + strings, not as 1 or 0 numbers. If numbers are used the resulting + CSV data is indistinguishable from actual integer numbers.
      • +
      • Null/nil values MUST be rendered as empty strings.
      • +
      +
    24. +
    25. When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST + be supported.
    26. +
    27. When rendering output CSV data, CRLF MUST be used for line breaks to ensure + maximum cross-platform compatibility.
    28. +
    +

    About

    +

    This CSV specification is authored by Jim Myhrberg.

    +

    If you’d like to leave feedback, + please open an issue on GitHub.

    +

    License

    +

    CC0 1.0 Universal

    +
    +
    +
    + + + \ No newline at end of file diff --git a/index.md b/index.md new file mode 100644 index 0000000..e2a314d --- /dev/null +++ b/index.md @@ -0,0 +1,312 @@ +--- +title: CSV Spec 0.9.0-draft.1 +version: 0.9.0-draft.1 +--- +CSV Spec 0.9.0-draft.1 +==================== + +Summary +------- + +CSV is not a file format, it is a loose set of guidelines of how to structure +tabular data into a plain text string. As such there's an endless amount of +`*.csv` files floating around which are highly incompatible with each other. The +closest thing there is to a specification is [RFC +4180](http://tools.ietf.org/html/rfc4180). + +Goals +----- + +This project is an attempt to summarize RFC 4180 and the information in the +[Comma-separated values +(CSV)](http://en.wikipedia.org/wiki/Comma-separated_values) Wikipedia article +into a easy to understand format. The spec will also take into account that the +comma (`,`) character is not the only character used as a field +delimiter. Semi-colons (`;`), tabs (`\t`), and more are popular field delimiter +characters. As such the specification will more accurately be describing a +CSV-like structured data format. + +We will also provide input/output test files that CSV parser/writer software +libraries can use to validate if they properly adhere to the rules laid out in +this specification. And if possible we will even try to provide code snippets in +various languages that attempts to automatically determine the delimiter +character used in any given input CSV-like formatted file/data. + +Roadmap +------- + +1. Write up core specification rules. _[in-progress]_ +2. Create input/output test files covering all rules in the specification. +3. Create website for [csv-spec.org](http://csv-spec.org/). +4. Create linting tool as a NPM module, allowing easy validation of CSV data + both client-side in a web browser, and server side via a command line tool. +5. Create automatic delimiter character detection code snippets in various + programming languages which CSV parser developers can freely use to enhance + their libraries. + +Terminology +----------- + +- **Field** — A singular String value within a record. +- **Record** (or **Row**) — A collection of fields. This is often referred to as + a "line", but a single record can span multiple text lines if a field within + it contains one or more line breaks. +- **Delimiter** — The character used to separate fields withing a row. Commonly + this will be a comma (`,`), but semi-colons (`;`) or tabs (`\t`) are two other + popular delimiter characters. +- **Header** — The first row is often used to contain the column names for all + remaining rows. Header names would be used as key names when CSV data is + converted to JSON for example. +- **Line Break** — Line breaks in CSV files can be CRLF (`\r\n`), LF (`\n`), and + even in rare cases CR (`\r`). +- **LF, CR, and CRLF** — Different types of line breaks, typically determined by + the OS. Linux, OSX, and other *NIX operating systems generally use a line feed + (LF or `\n`) character. Windows uses a carriage return (CR or `\r`) and a line + feed character, effectively "CRLF" (`\r\n`). + +CSV Format Specification +------------------------ + +The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", +"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be +interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119). + +These rules are mostly based on the corresponding section from [RFC +4180](http://tools.ietf.org/html/rfc4180#section-2), with minor changes, +clarifications and improved examples. + +1. Each record starts at the beginning of its own line, and ends with a line + break (shown as `¬`). + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx,yyy,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +2. Though it is RECOMMENDED, the last record in a file is not required to have a + ending line break. + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx,yyy,zzz + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +3. There may be an OPTIONAL header line appearing as the first line of the file + with the same format as normal records. This header will contain names + corresponding to the fields in the file, and MUST contain the same number of + fields as the records in the rest of the file. + + CSV: + + ```csv + field_1,field_2,field_3¬ + aaa,bbb,ccc¬ + xxx,yyy,zzz¬ + ``` + + JSON (ignoring headers): + + ```json + [ ["field_1", "field_2", "field_3"], + ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + + JSON (using headers): + + ```json + [ {"field_1": "aaa", "field_2": "bbb", "field_3": "ccc"}, + {"field_1": "xxx", "field_2": "yyy", "field_3": "zzz"} ] + ``` + +4. Within each record and the OPTIONAL header, there may be one or more fields, + separated by a delimiter (normally a comma). Each record MUST contain the + same number of fields throughout the file. + + CSV (invalid): + + ```csv + aaa,bbb,ccc¬ + 111,222,333,444¬ + xxx,yyy,zzz¬ + ``` + +5. The last field in a record MUST NOT be followed by a comma. This results in a + additional field with nothing in it. + + CSV: + + ```csv + aaa,bbb,ccc,¬ + xxx,yyy,zzz,¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc", ""], + ["xxx", "yyy", "zzz", ""] ] + ``` + +6. Spaces are considered part of a field and MUST NOT be ignored. + + CSV: + + ```csv + aaa , bbb , ccc¬ + xxx, yyy ,zzz ¬ + ``` + + JSON: + + ```json + [ ["aaa ", " bbb ", " ccc"], + [" xxx", " yyy ", "zzz "] ] + ``` + +7. Fields containing line breaks (CRLF, LF, or CR), double quotes, or the + delimiter character (normally a comma) MUST be enclosed in double-quotes. + + CSV: + + ```csv + aaa,"b¬ + bb",ccc¬ + xxx,"y, yy",zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "b\r\nbb", "ccc"], + ["xxx", "y, yy", "zzz"] ] + ``` + +8. A double-quote appearing inside a field MUST be escaped by preceding it with + another double quote, and the field itself MUST be enclosed in double quotes. + + CSV: + + ```csv + aaa,"b""bb",ccc¬ + ``` + + JSON: + + ```json + [ ["aaa", "b\"bb", "ccc"] ] + ``` + +9. When a field enclosed in double quotes has spaces before and/or after the + double quotes, the spaces MUST be ignored, as the field starts and ends with + the double quotes. However this is considered invalid formatting and the CSV + parser SHOULD report some form of warning message. + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx, "y, yy" ,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "y, yy", "zzz"] ] + ``` + +10. It is possible to enclose every field in double quotes even if they don't + need to be enclosed. However it is RECOMMENDED to only enclose fields in + double quotes that requires it. + + CSV: + + ```csv + "aaa","bbb","ccc"¬ + "xxx",yyy,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +11. All fields are always strings. CSV itself does not support type casting to + integers, floats, booleans, or anything else. It is not a CSV library's + responsibility to type cast input CSV data. + + If type casting is required, it is up to the developer using a specific CSV + library to ensure types are correctly dealt with. + + Input JSON: + + ```json + [ [10, true, 0.3, null, "aaa"], + [11, false, 2.13, "", "bbb"] ] + ``` + + Output CSV: + + ```csv + 10,true,0.3,,aaa¬ + 11,false,2.13,,bbb¬ + ``` + + Output CSV parsed back to JSON: + + ```json + [ ["10", "true", "0.3", "", "aaa"], + ["11", "false", "2.13", "", "bbb"] ] + ``` + + At this point it is up to the developer themselves to type cast the above + output data from the CSV parser. + +12. However, when rendering type cast input data to CSV text, non-string types + MUST be converted to a string in such a way that minimal information is + lost. + - Integers and floats MUST be rendered as a string version of themselves. + - Booleans `true` and `false` MUST be rendered as `true` and `false` + strings, not as `1` or `0` numbers. If numbers are used the resulting + CSV data is indistinguishable from actual integer numbers. + - `Null`/`nil` values MUST be rendered as empty strings. + +13. When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST + be supported. +14. When rendering output CSV data, CRLF MUST be used for line breaks to ensure + maximum cross-platform compatibility. + +About +----- + +This CSV specification is authored by [Jim Myhrberg](https://jimeh.me/). + +If you'd like to leave feedback, +please [open an issue on GitHub](https://github.com/parsecsv/csv-spec/issues). + +License +------- + +[CC0 1.0 Universal](http://creativecommons.org/publicdomain/zero/1.0/) + diff --git a/spec/0.9.0-draft.1.md b/spec/0.9.0-draft.1.md new file mode 100644 index 0000000..e2a314d --- /dev/null +++ b/spec/0.9.0-draft.1.md @@ -0,0 +1,312 @@ +--- +title: CSV Spec 0.9.0-draft.1 +version: 0.9.0-draft.1 +--- +CSV Spec 0.9.0-draft.1 +==================== + +Summary +------- + +CSV is not a file format, it is a loose set of guidelines of how to structure +tabular data into a plain text string. As such there's an endless amount of +`*.csv` files floating around which are highly incompatible with each other. The +closest thing there is to a specification is [RFC +4180](http://tools.ietf.org/html/rfc4180). + +Goals +----- + +This project is an attempt to summarize RFC 4180 and the information in the +[Comma-separated values +(CSV)](http://en.wikipedia.org/wiki/Comma-separated_values) Wikipedia article +into a easy to understand format. The spec will also take into account that the +comma (`,`) character is not the only character used as a field +delimiter. Semi-colons (`;`), tabs (`\t`), and more are popular field delimiter +characters. As such the specification will more accurately be describing a +CSV-like structured data format. + +We will also provide input/output test files that CSV parser/writer software +libraries can use to validate if they properly adhere to the rules laid out in +this specification. And if possible we will even try to provide code snippets in +various languages that attempts to automatically determine the delimiter +character used in any given input CSV-like formatted file/data. + +Roadmap +------- + +1. Write up core specification rules. _[in-progress]_ +2. Create input/output test files covering all rules in the specification. +3. Create website for [csv-spec.org](http://csv-spec.org/). +4. Create linting tool as a NPM module, allowing easy validation of CSV data + both client-side in a web browser, and server side via a command line tool. +5. Create automatic delimiter character detection code snippets in various + programming languages which CSV parser developers can freely use to enhance + their libraries. + +Terminology +----------- + +- **Field** — A singular String value within a record. +- **Record** (or **Row**) — A collection of fields. This is often referred to as + a "line", but a single record can span multiple text lines if a field within + it contains one or more line breaks. +- **Delimiter** — The character used to separate fields withing a row. Commonly + this will be a comma (`,`), but semi-colons (`;`) or tabs (`\t`) are two other + popular delimiter characters. +- **Header** — The first row is often used to contain the column names for all + remaining rows. Header names would be used as key names when CSV data is + converted to JSON for example. +- **Line Break** — Line breaks in CSV files can be CRLF (`\r\n`), LF (`\n`), and + even in rare cases CR (`\r`). +- **LF, CR, and CRLF** — Different types of line breaks, typically determined by + the OS. Linux, OSX, and other *NIX operating systems generally use a line feed + (LF or `\n`) character. Windows uses a carriage return (CR or `\r`) and a line + feed character, effectively "CRLF" (`\r\n`). + +CSV Format Specification +------------------------ + +The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", +"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be +interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119). + +These rules are mostly based on the corresponding section from [RFC +4180](http://tools.ietf.org/html/rfc4180#section-2), with minor changes, +clarifications and improved examples. + +1. Each record starts at the beginning of its own line, and ends with a line + break (shown as `¬`). + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx,yyy,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +2. Though it is RECOMMENDED, the last record in a file is not required to have a + ending line break. + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx,yyy,zzz + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +3. There may be an OPTIONAL header line appearing as the first line of the file + with the same format as normal records. This header will contain names + corresponding to the fields in the file, and MUST contain the same number of + fields as the records in the rest of the file. + + CSV: + + ```csv + field_1,field_2,field_3¬ + aaa,bbb,ccc¬ + xxx,yyy,zzz¬ + ``` + + JSON (ignoring headers): + + ```json + [ ["field_1", "field_2", "field_3"], + ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + + JSON (using headers): + + ```json + [ {"field_1": "aaa", "field_2": "bbb", "field_3": "ccc"}, + {"field_1": "xxx", "field_2": "yyy", "field_3": "zzz"} ] + ``` + +4. Within each record and the OPTIONAL header, there may be one or more fields, + separated by a delimiter (normally a comma). Each record MUST contain the + same number of fields throughout the file. + + CSV (invalid): + + ```csv + aaa,bbb,ccc¬ + 111,222,333,444¬ + xxx,yyy,zzz¬ + ``` + +5. The last field in a record MUST NOT be followed by a comma. This results in a + additional field with nothing in it. + + CSV: + + ```csv + aaa,bbb,ccc,¬ + xxx,yyy,zzz,¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc", ""], + ["xxx", "yyy", "zzz", ""] ] + ``` + +6. Spaces are considered part of a field and MUST NOT be ignored. + + CSV: + + ```csv + aaa , bbb , ccc¬ + xxx, yyy ,zzz ¬ + ``` + + JSON: + + ```json + [ ["aaa ", " bbb ", " ccc"], + [" xxx", " yyy ", "zzz "] ] + ``` + +7. Fields containing line breaks (CRLF, LF, or CR), double quotes, or the + delimiter character (normally a comma) MUST be enclosed in double-quotes. + + CSV: + + ```csv + aaa,"b¬ + bb",ccc¬ + xxx,"y, yy",zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "b\r\nbb", "ccc"], + ["xxx", "y, yy", "zzz"] ] + ``` + +8. A double-quote appearing inside a field MUST be escaped by preceding it with + another double quote, and the field itself MUST be enclosed in double quotes. + + CSV: + + ```csv + aaa,"b""bb",ccc¬ + ``` + + JSON: + + ```json + [ ["aaa", "b\"bb", "ccc"] ] + ``` + +9. When a field enclosed in double quotes has spaces before and/or after the + double quotes, the spaces MUST be ignored, as the field starts and ends with + the double quotes. However this is considered invalid formatting and the CSV + parser SHOULD report some form of warning message. + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx, "y, yy" ,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "y, yy", "zzz"] ] + ``` + +10. It is possible to enclose every field in double quotes even if they don't + need to be enclosed. However it is RECOMMENDED to only enclose fields in + double quotes that requires it. + + CSV: + + ```csv + "aaa","bbb","ccc"¬ + "xxx",yyy,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +11. All fields are always strings. CSV itself does not support type casting to + integers, floats, booleans, or anything else. It is not a CSV library's + responsibility to type cast input CSV data. + + If type casting is required, it is up to the developer using a specific CSV + library to ensure types are correctly dealt with. + + Input JSON: + + ```json + [ [10, true, 0.3, null, "aaa"], + [11, false, 2.13, "", "bbb"] ] + ``` + + Output CSV: + + ```csv + 10,true,0.3,,aaa¬ + 11,false,2.13,,bbb¬ + ``` + + Output CSV parsed back to JSON: + + ```json + [ ["10", "true", "0.3", "", "aaa"], + ["11", "false", "2.13", "", "bbb"] ] + ``` + + At this point it is up to the developer themselves to type cast the above + output data from the CSV parser. + +12. However, when rendering type cast input data to CSV text, non-string types + MUST be converted to a string in such a way that minimal information is + lost. + - Integers and floats MUST be rendered as a string version of themselves. + - Booleans `true` and `false` MUST be rendered as `true` and `false` + strings, not as `1` or `0` numbers. If numbers are used the resulting + CSV data is indistinguishable from actual integer numbers. + - `Null`/`nil` values MUST be rendered as empty strings. + +13. When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST + be supported. +14. When rendering output CSV data, CRLF MUST be used for line breaks to ensure + maximum cross-platform compatibility. + +About +----- + +This CSV specification is authored by [Jim Myhrberg](https://jimeh.me/). + +If you'd like to leave feedback, +please [open an issue on GitHub](https://github.com/parsecsv/csv-spec/issues). + +License +------- + +[CC0 1.0 Universal](http://creativecommons.org/publicdomain/zero/1.0/) +