commit fd1e7465b873f043750de72b56fac9e6546a3b10 Author: Jim Myhrberg Date: Sun Oct 8 15:27:35 2017 +0100 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..404d8b1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +_site +.asset-cache +.sass-cache +.jekyll-metadata +docs/assets/.sprockets-manifest-*.json diff --git a/404.html b/404.html new file mode 100644 index 0000000..e6b61db --- /dev/null +++ b/404.html @@ -0,0 +1,9 @@ +--- +title: 404 Page Not Found +sitemap: false +--- +
+

404

+

Page not found :(

+

The requested page could not be found.

+
diff --git a/CNAME b/CNAME new file mode 100644 index 0000000..71f60da --- /dev/null +++ b/CNAME @@ -0,0 +1 @@ +csv-spec.org diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..fefd6a7 --- /dev/null +++ b/Gemfile @@ -0,0 +1,21 @@ +source 'https://rubygems.org' + +gem 'jekyll', '3.5.0' + +group :development do + gem 'rake' + gem 'rubocop' +end + +# If you have any plugins, put them here! +group :jekyll_plugins do + gem 'jekyll-assets' + gem 'jekyll-pants' + gem 'jekyll-seo-tag' + gem 'jekyll-sitemap' + gem 'jekyll-tidy' + gem 'uglifier' # required by 'jekyll-assets' for JS compression +end + +# Windows does not include zoneinfo files, so bundle the tzinfo-data gem +gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..91ecdd9 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,102 @@ +GEM + remote: https://rubygems.org/ + specs: + addressable (2.5.1) + public_suffix (~> 2.0, >= 2.0.2) + ast (2.3.0) + colorator (1.1.0) + concurrent-ruby (1.0.5) + execjs (2.7.0) + extras (0.3.0) + forwardable-extended (~> 2.5) + fastimage (2.1.0) + ffi (1.9.18) + forwardable-extended (2.6.0) + htmlbeautifier (1.3.1) + htmlcompressor (0.3.1) + jekyll (3.5.0) + addressable (~> 2.4) + colorator (~> 1.0) + jekyll-sass-converter (~> 1.0) + jekyll-watch (~> 1.1) + kramdown (~> 1.3) + liquid (~> 4.0) + mercenary (~> 0.3.3) + pathutil (~> 0.9) + rouge (~> 1.7) + safe_yaml (~> 1.0) + jekyll-assets (2.3.2) + concurrent-ruby (~> 1.0) + extras (~> 0.2) + fastimage (~> 2.0, >= 1.8) + jekyll (~> 3.1, >= 3.0) + pathutil (>= 0.8) + rack (~> 1.6) + sprockets (~> 3.3, < 3.8) + jekyll-pants (0.2.1) + rubypants + jekyll-sass-converter (1.5.0) + sass (~> 3.4) + jekyll-seo-tag (2.2.3) + jekyll (~> 3.3) + jekyll-sitemap (1.0.0) + jekyll (~> 3.3) + jekyll-tidy (0.2.2) + htmlbeautifier + htmlcompressor + jekyll + jekyll-watch (1.5.0) + listen (~> 3.0, < 3.1) + kramdown (1.14.0) + liquid (4.0.0) + listen (3.0.8) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + mercenary (0.3.6) + parser (2.4.0.0) + ast (~> 2.2) + pathutil (0.14.0) + forwardable-extended (~> 2.6) + powerpack (0.1.1) + public_suffix (2.0.5) + rack (1.6.8) + rainbow (2.2.1) + rake (12.0.0) + rb-fsevent (0.10.2) + rb-inotify (0.9.10) + ffi (>= 0.5.0, < 2) + rouge (1.11.1) + rubocop (0.47.1) + parser (>= 2.3.3.1, < 3.0) + powerpack (~> 0.1) + rainbow (>= 1.99.1, < 3.0) + ruby-progressbar (~> 1.7) + unicode-display_width (~> 1.0, >= 1.0.1) + ruby-progressbar (1.8.1) + rubypants (0.6.0) + safe_yaml (1.0.4) + sass (3.4.25) + sprockets (3.7.1) + concurrent-ruby (~> 1.0) + rack (> 1, < 3) + uglifier (3.2.0) + execjs (>= 0.3.0, < 3) + unicode-display_width (1.3.0) + +PLATFORMS + ruby + +DEPENDENCIES + jekyll (= 3.5.0) + jekyll-assets + jekyll-pants + jekyll-seo-tag + jekyll-sitemap + jekyll-tidy + rake + rubocop + tzinfo-data + uglifier + +BUNDLED WITH + 1.14.6 diff --git a/README.md b/README.md new file mode 100644 index 0000000..54685fc --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# csv-spec.org + +This is the site at http://csv-spec.org/ that attempts to describe CSV-like +formats in a obvious and easy to understand way, complete with code examples +aimed at developers. diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..49efba5 --- /dev/null +++ b/Rakefile @@ -0,0 +1,85 @@ +require 'open-uri' +require 'yaml' + +desc 'Build site into docs directory' +task :build do + jekyll_build +end + +desc 'Update index.md and spec folder based on versions in _config.yml' +task :update do + config = YAML.load_file('_config.yml') + current_version = config['current_version'] + versions = config['versions'] + + remove_all_specs(config['update']) + + puts '' + puts 'Fetching configured spec versions:' + versions.each do |version| + spec = fetch_spec(version, config['update']) + + if current_version == version + write_file('index.md', spec[:body], " (#{version})") + end + + filename = File.join(config['update']['output_dir'], version) + write_file("#{filename}.md", spec[:body]) + write_file("#{filename}.svg", spec[:diagram]) if spec[:diagram] + end + + jekyll_build +end + +def jekyll_build + puts 'Rebuilding output into docs directory...' + exec 'jekyll build --destination docs && touch docs/.nojekyll' +end + +def write_file(file, content, comment = nil) + puts " - #{file}#{comment}" + File.write(file, content) +end + +def fetch_spec(version, config) + document = get(build_file_url('document', version, config)) + + if config['files']['diagram'] + diagram = get(build_file_url('diagram', version, config)) + img_tag = config['img_tpl'].gsub('{{file}}', "#{version}.svg") + document.gsub!(/\A(.*\n=+\n)/, "\\1\n#{img_tag}\n") + end + + title = document.split("\n", 2).first + body = config['body_tpl'].gsub('{{content}}', document) + .gsub('{{title}}', title) + .gsub('{{version}}', version) + + { + version: version, + title: title, + body: body, + diagram: diagram + } +end + +def build_file_url(file, version, config) + config['url_tpl'] + .gsub('{{version}}', version) + .gsub('{{file}}', config['files'][file]) +end + +def get(url) + URI.parse(url).read +rescue OpenURI::HTTPError + nil +end + +def remove_all_specs(config) + puts '' + puts 'Removing existing spec files:' + Dir["#{config['output_dir']}/*"].each do |file| + puts " #{file.gsub(File.dirname(__FILE__), '')}" + File.delete(file) + end +end diff --git a/_assets/css/_base.scss b/_assets/css/_base.scss new file mode 100644 index 0000000..5cb3783 --- /dev/null +++ b/_assets/css/_base.scss @@ -0,0 +1,90 @@ +html { + height: 100%; +} + +body { + font-family: 'Open Sans', Helvetica, Arial, sans-serif; + font-size: 16px; + font-weight: 400; + line-height: 1.5; + color: #1a1a1a; + background-color: #fdfdfd; +} + +h1, h2, h3, h4, h5, h6 { + font-family: 'Open Sans Condensed', Helvetica, Arial, sans-serif; + font-weight: 700; + color: #333; +} + +h1 { + font-size: 2.5em; + line-height: 1.2; +} + +ol ol, ul ol { + list-style-type: lower-roman; +} + +ul ul ol, ul ol ol, ol ul ol, ol ol ol { + list-style-type: lower-alpha; +} + +.content { + margin-top: 80px; + + a { + word-break: break-word; + } + + code { + background-color: rgba(27,31,35,0.05); + border-radius: 3px; + font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; + font-size: 85%; + margin: 0; + padding: 0.3em 0.4em 0.1em 0.4em; + } + + pre { + background-color: #f6f8fa; + border-radius: 3px; + line-height: 1.45; + padding: 16px; + } + + pre > code { + background-color: transparent !important; + border-radius: none; + font-size: 90%; + padding: 0; + } +} + +#menu { + .pure-menu-label { + color: #999; + border: none; + padding: 0.6em 0 0.6em 0.6em; + } + + .links { + font-size: 50px; + position: absolute; + bottom: 10px; + left: 0px; + right: 0px; + text-align: center; + + a { + color: #555; + padding: 0; + position: relative; + text-decoration: none; + + &:hover { + color: #777; + } + } + } +} diff --git a/_assets/css/_highlight.scss b/_assets/css/_highlight.scss new file mode 100644 index 0000000..2192921 --- /dev/null +++ b/_assets/css/_highlight.scss @@ -0,0 +1,62 @@ +/* https://github.com/jwarby/jekyll-pygments-themes/blob/22dfd74f9f10c87c0ec98876e136f02c989d43ba/github.css */ +.highlight .hll { background-color: #ffffcc } +.highlight .c { color: #999988; font-style: italic } /* Comment */ +.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ +.highlight .k { color: #000000; font-weight: bold } /* Keyword */ +.highlight .o { color: #000000; font-weight: bold } /* Operator */ +.highlight .cm { color: #999988; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #999999; font-weight: bold; font-style: italic } /* Comment.Preproc */ +.highlight .c1 { color: #999988; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ +.highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #aa0000 } /* Generic.Error */ +.highlight .gh { color: #999999 } /* Generic.Heading */ +.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ +.highlight .go { color: #888888 } /* Generic.Output */ +.highlight .gp { color: #555555 } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #aaaaaa } /* Generic.Subheading */ +.highlight .gt { color: #aa0000 } /* Generic.Traceback */ +.highlight .kc { color: #000000; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #000000; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #000000; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #000000; font-weight: bold } /* Keyword.Pseudo */ +.highlight .kr { color: #000000; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #445588; font-weight: bold } /* Keyword.Type */ +.highlight .m { color: #009999 } /* Literal.Number */ +.highlight .s { color: #d01040 } /* Literal.String */ +.highlight .na { color: #008080 } /* Name.Attribute */ +.highlight .nb { color: #0086B3 } /* Name.Builtin */ +.highlight .nc { color: #445588; font-weight: bold } /* Name.Class */ +.highlight .no { color: #008080 } /* Name.Constant */ +.highlight .nd { color: #3c5d5d; font-weight: bold } /* Name.Decorator */ +.highlight .ni { color: #800080 } /* Name.Entity */ +.highlight .ne { color: #990000; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #990000; font-weight: bold } /* Name.Function */ +.highlight .nl { color: #990000; font-weight: bold } /* Name.Label */ +.highlight .nn { color: #555555 } /* Name.Namespace */ +.highlight .nt { color: #000080 } /* Name.Tag */ +.highlight .nv { color: #008080 } /* Name.Variable */ +.highlight .ow { color: #000000; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mf { color: #009999 } /* Literal.Number.Float */ +.highlight .mh { color: #009999 } /* Literal.Number.Hex */ +.highlight .mi { color: #009999 } /* Literal.Number.Integer */ +.highlight .mo { color: #009999 } /* Literal.Number.Oct */ +.highlight .sb { color: #d01040 } /* Literal.String.Backtick */ +.highlight .sc { color: #d01040 } /* Literal.String.Char */ +.highlight .sd { color: #d01040 } /* Literal.String.Doc */ +.highlight .s2 { color: #d01040 } /* Literal.String.Double */ +.highlight .se { color: #d01040 } /* Literal.String.Escape */ +.highlight .sh { color: #d01040 } /* Literal.String.Heredoc */ +.highlight .si { color: #d01040 } /* Literal.String.Interpol */ +.highlight .sx { color: #d01040 } /* Literal.String.Other */ +.highlight .sr { color: #009926 } /* Literal.String.Regex */ +.highlight .s1 { color: #d01040 } /* Literal.String.Single */ +.highlight .ss { color: #990073 } /* Literal.String.Symbol */ +.highlight .bp { color: #999999 } /* Name.Builtin.Pseudo */ +.highlight .vc { color: #008080 } /* Name.Variable.Class */ +.highlight .vg { color: #008080 } /* Name.Variable.Global */ +.highlight .vi { color: #008080 } /* Name.Variable.Instance */ +.highlight .il { color: #009999 } /* Literal.Number.Integer.Long */ diff --git a/_assets/css/_side-menu.scss b/_assets/css/_side-menu.scss new file mode 100644 index 0000000..7abd61c --- /dev/null +++ b/_assets/css/_side-menu.scss @@ -0,0 +1,248 @@ +body { + color: #777; +} + +.pure-img-responsive { + max-width: 100%; + height: auto; +} + +/* +Add transition to containers so they can push in and out. +*/ +#layout, +#menu, +.menu-link { + -webkit-transition: all 0.2s ease-out; + -moz-transition: all 0.2s ease-out; + -ms-transition: all 0.2s ease-out; + -o-transition: all 0.2s ease-out; + transition: all 0.2s ease-out; +} + +/* +This is the parent `
` that contains the menu and the content area. +*/ +#layout { + position: relative; + left: 0; + padding-left: 0; +} + #layout.active #menu { + left: 150px; + width: 150px; + } + + #layout.active .menu-link { + left: 150px; + } +/* +The content `
` is where all your content goes. +*/ +.content { + margin: 0 auto; + padding: 0 2em; + max-width: 800px; + margin-bottom: 50px; + line-height: 1.6em; +} + +.header { + margin: 0; + color: #333; + text-align: center; + padding: 2.5em 2em 0; + border-bottom: 1px solid #eee; + } + .header h1 { + margin: 0.2em 0; + font-size: 3em; + font-weight: 300; + } + .header h2 { + font-weight: 300; + color: #ccc; + padding: 0; + margin-top: 0; + } + +.content-subhead { + margin: 50px 0 20px 0; + font-weight: 300; + color: #888; +} + + + +/* +The `#menu` `
` is the parent `
` that contains the `.pure-menu` that +appears on the left side of the page. +*/ + +#menu { + margin-left: -150px; /* "#menu" width */ + width: 150px; + position: fixed; + top: 0; + left: 0; + bottom: 0; + z-index: 1000; /* so the menu or its navicon stays above all content */ + background: #191818; + overflow-y: auto; + -webkit-overflow-scrolling: touch; +} + /* + All anchors inside the menu should be styled like this. + */ + #menu a { + color: #999; + border: none; + padding: 0.6em 0 0.6em 0.6em; + } + + /* + Remove all background/borders, since we are applying them to #menu. + */ + #menu .pure-menu, + #menu .pure-menu ul { + border: none; + background: transparent; + } + + /* + Add that light border to separate items into groups. + */ + #menu .pure-menu ul, + #menu .pure-menu .menu-item-divided { + border-top: 1px solid #333; + } + /* + Change color of the anchor links on hover/focus. + */ + #menu .pure-menu li a:hover, + #menu .pure-menu li a:focus { + background: #333; + } + + /* + This styles the selected menu item `
  • `. + */ + #menu .pure-menu-selected, + #menu .pure-menu-heading { + background: #1f8dd6; + } + /* + This styles a link within a selected menu item `
  • `. + */ + #menu .pure-menu-selected a { + color: #fff; + } + + /* + This styles the menu heading. + */ + #menu .pure-menu-heading { + font-size: 110%; + color: #fff; + margin: 0; + } + +/* -- Dynamic Button For Responsive Menu -------------------------------------*/ + +/* +The button to open/close the Menu is custom-made and not part of Pure. Here's +how it works: +*/ + +/* +`.menu-link` represents the responsive menu toggle that shows/hides on +small screens. +*/ +.menu-link { + position: fixed; + display: block; /* show this only on small screens */ + top: 0; + left: 0; /* "#menu width" */ + background: #000; + background: rgba(0,0,0,0.7); + font-size: 10px; /* change this value to increase/decrease button size */ + z-index: 10; + width: 2em; + height: auto; + padding: 2.1em 1.6em; +} + + .menu-link:hover, + .menu-link:focus { + background: #000; + } + + .menu-link span { + position: relative; + display: block; + } + + .menu-link span, + .menu-link span:before, + .menu-link span:after { + background-color: #fff; + width: 100%; + height: 0.2em; + } + + .menu-link span:before, + .menu-link span:after { + position: absolute; + margin-top: -0.6em; + content: " "; + } + + .menu-link span:after { + margin-top: 0.6em; + } + + +/* -- Responsive Styles (Media Queries) ------------------------------------- */ + +/* +Hides the menu at `48em`, but modify this based on your app's needs. +*/ +@media (min-width: 48em) { + + .header, + .content { + padding-left: 2em; + padding-right: 2em; + } + + #layout { + padding-left: 150px; /* left col width "#menu" */ + left: 0; + } + #menu { + left: 150px; + } + + .menu-link { + position: fixed; + left: 150px; + display: none; + } + + #layout.active .menu-link { + left: 150px; + } +} + +@media (max-width: 48em) { + /* Only apply this when the window is small. Otherwise, the following + case results in extra padding on the left: + * Make the window small. + * Tap the menu to trigger the active state. + * Make the window large again. + */ + #layout.active { + position: relative; + left: 150px; + } +} diff --git a/_assets/css/main.scss b/_assets/css/main.scss new file mode 100644 index 0000000..6108420 --- /dev/null +++ b/_assets/css/main.scss @@ -0,0 +1,3 @@ +@import "side-menu"; +@import "highlight"; +@import "base"; diff --git a/_assets/js/main.js b/_assets/js/main.js new file mode 100644 index 0000000..afe5bb6 --- /dev/null +++ b/_assets/js/main.js @@ -0,0 +1 @@ +// = require ui diff --git a/_assets/js/ui.js b/_assets/js/ui.js new file mode 100644 index 0000000..4308341 --- /dev/null +++ b/_assets/js/ui.js @@ -0,0 +1,44 @@ +(function (window, document) { + var layout = document.getElementById('layout'); + var menu = document.getElementById('menu'); + var menuLink = document.getElementById('menuLink'); + var content = document.getElementById('main'); + + function toggleClass (element, className) { + var classes = element.className.split(/\s+/); + var length = classes.length; + var i = 0; + + for (; i < length; i++) { + if (classes[i] === className) { + classes.splice(i, 1); + break; + } + } + // The className is not found + if (length === classes.length) { + classes.push(className); + } + + element.className = classes.join(' '); + } + + function toggleAll (e) { + var active = 'active'; + + e.preventDefault(); + toggleClass(layout, active); + toggleClass(menu, active); + toggleClass(menuLink, active); + } + + menuLink.onclick = function (e) { + toggleAll(e); + }; + + content.onclick = function (e) { + if (menu.className.indexOf('active') !== -1) { + toggleAll(e); + } + }; +}(this, this.document)); diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..8318ab0 --- /dev/null +++ b/_config.yml @@ -0,0 +1,50 @@ +title: CSV Spec +description: > + An attempt to describe CSV-like formats in a obvious and easy to understand + way, complete with code examples aimed at developers. +author: Jim Myhrberg +hostname: csv-spec.org +url: https://csv-spec.org +repo_url: https://github.com/parsecsv/csv-spec + +current_version: 0.9.0-draft.1 +versions: + - 0.9.0-draft.1 + +exclude: + - Gemfile + - Gemfile.lock + - Rakefile + - README.md + +update: + body_tpl: | + --- + title: {{title}} + version: {{version}} + --- + {{content}} + url_tpl: "https://github.com/parsecsv/csv-spec/raw/{{version}}/{{file}}" + output_dir: "spec" + files: + document: csv-spec.md + +plugins: + - jekyll-assets + - jekyll-pants + - jekyll-sitemap + - jekyll-seo-tag + - jekyll-tidy + +defaults: + - + scope: + path: "" + values: + layout: "default" + +assets: + digest: true + compress: + css: true + js: true diff --git a/_layouts/default.html b/_layouts/default.html new file mode 100644 index 0000000..9e047d6 --- /dev/null +++ b/_layouts/default.html @@ -0,0 +1,54 @@ + + + + + + + + + + {% css main %} + {% seo %} + + +
    + + + + + +
    +
    + {{ content }} +
    +
    +
    + {% js main %} + + diff --git a/docs/.nojekyll b/docs/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 0000000..ddccd81 --- /dev/null +++ b/docs/404.html @@ -0,0 +1,60 @@ + + + + + + + + + + + + 404 Page Not Found | CSV Spec + + + + + + + + + + + + +
    + + + + +
    +
    +
    +

    404

    +

    Page not found :(

    +

    The requested page could not be found.

    +
    +
    +
    +
    + + + \ No newline at end of file diff --git a/docs/CNAME b/docs/CNAME new file mode 100644 index 0000000..71f60da --- /dev/null +++ b/docs/CNAME @@ -0,0 +1 @@ +csv-spec.org diff --git a/docs/assets/main-5df19fc13b2a391dcab974f0584820f95eecdd416e60b99950f2058c61a2e99f.css b/docs/assets/main-5df19fc13b2a391dcab974f0584820f95eecdd416e60b99950f2058c61a2e99f.css new file mode 100644 index 0000000..05248ba --- /dev/null +++ b/docs/assets/main-5df19fc13b2a391dcab974f0584820f95eecdd416e60b99950f2058c61a2e99f.css @@ -0,0 +1 @@ +body{color:#777}.pure-img-responsive{max-width:100%;height:auto}#layout,#menu,.menu-link{-webkit-transition:all 0.2s ease-out;-moz-transition:all 0.2s ease-out;-ms-transition:all 0.2s ease-out;-o-transition:all 0.2s ease-out;transition:all 0.2s ease-out}#layout{position:relative;left:0;padding-left:0}#layout.active #menu{left:150px;width:150px}#layout.active .menu-link{left:150px}.content{margin:0 auto;padding:0 2em;max-width:800px;margin-bottom:50px;line-height:1.6em}.header{margin:0;color:#333;text-align:center;padding:2.5em 2em 0;border-bottom:1px solid #eee}.header h1{margin:0.2em 0;font-size:3em;font-weight:300}.header h2{font-weight:300;color:#ccc;padding:0;margin-top:0}.content-subhead{margin:50px 0 20px 0;font-weight:300;color:#888}#menu{margin-left:-150px;width:150px;position:fixed;top:0;left:0;bottom:0;z-index:1000;background:#191818;overflow-y:auto;-webkit-overflow-scrolling:touch}#menu a{color:#999;border:none;padding:0.6em 0 0.6em 0.6em}#menu .pure-menu,#menu .pure-menu ul{border:none;background:transparent}#menu .pure-menu ul,#menu .pure-menu .menu-item-divided{border-top:1px solid #333}#menu .pure-menu li a:hover,#menu .pure-menu li a:focus{background:#333}#menu .pure-menu-selected,#menu .pure-menu-heading{background:#1f8dd6}#menu .pure-menu-selected a{color:#fff}#menu .pure-menu-heading{font-size:110%;color:#fff;margin:0}.menu-link{position:fixed;display:block;top:0;left:0;background:#000;background:rgba(0,0,0,0.7);font-size:10px;z-index:10;width:2em;height:auto;padding:2.1em 1.6em}.menu-link:hover,.menu-link:focus{background:#000}.menu-link span{position:relative;display:block}.menu-link span,.menu-link span:before,.menu-link span:after{background-color:#fff;width:100%;height:0.2em}.menu-link span:before,.menu-link span:after{position:absolute;margin-top:-0.6em;content:" "}.menu-link span:after{margin-top:0.6em}@media (min-width: 48em){.header,.content{padding-left:2em;padding-right:2em}#layout{padding-left:150px;left:0}#menu{left:150px}.menu-link{position:fixed;left:150px;display:none}#layout.active .menu-link{left:150px}}@media (max-width: 48em){#layout.active{position:relative;left:150px}}.highlight .hll{background-color:#ffffcc}.highlight .c{color:#999988;font-style:italic}.highlight .err{color:#a61717;background-color:#e3d2d2}.highlight .k{color:#000000;font-weight:bold}.highlight .o{color:#000000;font-weight:bold}.highlight .cm{color:#999988;font-style:italic}.highlight .cp{color:#999999;font-weight:bold;font-style:italic}.highlight .c1{color:#999988;font-style:italic}.highlight .cs{color:#999999;font-weight:bold;font-style:italic}.highlight .gd{color:#000000;background-color:#ffdddd}.highlight .ge{color:#000000;font-style:italic}.highlight .gr{color:#aa0000}.highlight .gh{color:#999999}.highlight .gi{color:#000000;background-color:#ddffdd}.highlight .go{color:#888888}.highlight .gp{color:#555555}.highlight .gs{font-weight:bold}.highlight .gu{color:#aaaaaa}.highlight .gt{color:#aa0000}.highlight .kc{color:#000000;font-weight:bold}.highlight .kd{color:#000000;font-weight:bold}.highlight .kn{color:#000000;font-weight:bold}.highlight .kp{color:#000000;font-weight:bold}.highlight .kr{color:#000000;font-weight:bold}.highlight .kt{color:#445588;font-weight:bold}.highlight .m{color:#009999}.highlight .s{color:#d01040}.highlight .na{color:#008080}.highlight .nb{color:#0086B3}.highlight .nc{color:#445588;font-weight:bold}.highlight .no{color:#008080}.highlight .nd{color:#3c5d5d;font-weight:bold}.highlight .ni{color:#800080}.highlight .ne{color:#990000;font-weight:bold}.highlight .nf{color:#990000;font-weight:bold}.highlight .nl{color:#990000;font-weight:bold}.highlight .nn{color:#555555}.highlight .nt{color:#000080}.highlight .nv{color:#008080}.highlight .ow{color:#000000;font-weight:bold}.highlight .w{color:#bbbbbb}.highlight .mf{color:#009999}.highlight .mh{color:#009999}.highlight .mi{color:#009999}.highlight .mo{color:#009999}.highlight .sb{color:#d01040}.highlight .sc{color:#d01040}.highlight .sd{color:#d01040}.highlight .s2{color:#d01040}.highlight .se{color:#d01040}.highlight .sh{color:#d01040}.highlight .si{color:#d01040}.highlight .sx{color:#d01040}.highlight .sr{color:#009926}.highlight .s1{color:#d01040}.highlight .ss{color:#990073}.highlight .bp{color:#999999}.highlight .vc{color:#008080}.highlight .vg{color:#008080}.highlight .vi{color:#008080}.highlight .il{color:#009999}html{height:100%}body{font-family:'Open Sans', Helvetica, Arial, sans-serif;font-size:16px;font-weight:400;line-height:1.5;color:#1a1a1a;background-color:#fdfdfd}h1,h2,h3,h4,h5,h6{font-family:'Open Sans Condensed', Helvetica, Arial, sans-serif;font-weight:700;color:#333}h1{font-size:2.5em;line-height:1.2}ol ol,ul ol{list-style-type:lower-roman}ul ul ol,ul ol ol,ol ul ol,ol ol ol{list-style-type:lower-alpha}.content{margin-top:80px}.content a{word-break:break-word}.content code{background-color:rgba(27,31,35,0.05);border-radius:3px;font-family:"SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace;font-size:85%;margin:0;padding:0.3em 0.4em 0.1em 0.4em}.content pre{background-color:#f6f8fa;border-radius:3px;line-height:1.45;padding:16px}.content pre>code{background-color:transparent !important;border-radius:none;font-size:90%;padding:0}#menu .pure-menu-label{color:#999;border:none;padding:0.6em 0 0.6em 0.6em}#menu .links{font-size:50px;position:absolute;bottom:10px;left:0px;right:0px;text-align:center}#menu .links a{color:#555;padding:0;position:relative;text-decoration:none}#menu .links a:hover{color:#777} diff --git a/docs/assets/main-870855580c69dec57be4c965d0cf8afe78afa6b7b6f6bdb5aff91ac0256c0a1a.js b/docs/assets/main-870855580c69dec57be4c965d0cf8afe78afa6b7b6f6bdb5aff91ac0256c0a1a.js new file mode 100644 index 0000000..fa63c7a --- /dev/null +++ b/docs/assets/main-870855580c69dec57be4c965d0cf8afe78afa6b7b6f6bdb5aff91ac0256c0a1a.js @@ -0,0 +1 @@ +!function(e,n){function t(e,n){for(var t=e.className.split(/\s+/),i=t.length,c=0;c + + + + + + + + + + + CSV Spec 0.9.0-draft.1 | CSV Spec + + + + + + + + + + + + +
    + + + + +
    +
    +

    CSV Spec 0.9.0-draft.1

    +

    Summary

    +

    CSV is not a file format, it is a loose set of guidelines of how to structure + tabular data into a plain text string. As such there’s an endless amount of + *.csv files floating around which are highly incompatible with each other. The + closest thing there is to a specification is RFC + 4180.

    +

    Goals

    +

    This project is an attempt to summarize RFC 4180 and the information in the + Comma-separated values + (CSV) Wikipedia article + into a easy to understand format. The spec will also take into account that the + comma (,) character is not the only character used as a field + delimiter. Semi-colons (;), tabs (\t), and more are popular field delimiter + characters. As such the specification will more accurately be describing a + CSV-like structured data format.

    +

    We will also provide input/output test files that CSV parser/writer software + libraries can use to validate if they properly adhere to the rules laid out in + this specification. And if possible we will even try to provide code snippets in + various languages that attempts to automatically determine the delimiter + character used in any given input CSV-like formatted file/data.

    +

    Roadmap

    +
      +
    1. Write up core specification rules. [in-progress]
    2. +
    3. Create input/output test files covering all rules in the specification.
    4. +
    5. Create website for csv-spec.org.
    6. +
    7. Create linting tool as a NPM module, allowing easy validation of CSV data + both client-side in a web browser, and server side via a command line tool.
    8. +
    9. Create automatic delimiter character detection code snippets in various + programming languages which CSV parser developers can freely use to enhance + their libraries.
    10. +
    +

    Terminology

    +
      +
    • Field — A singular String value within a record.
    • +
    • Record (or Row) — A collection of fields. This is often referred to as + a “line”, but a single record can span multiple text lines if a field within + it contains one or more line breaks.
    • +
    • Delimiter — The character used to separate fields withing a row. Commonly + this will be a comma (,), but semi-colons (;) or tabs (\t) are two other + popular delimiter characters.
    • +
    • Header — The first row is often used to contain the column names for all + remaining rows. Header names would be used as key names when CSV data is + converted to JSON for example.
    • +
    • Line Break — Line breaks in CSV files can be CRLF (\r\n), LF (\n), and + even in rare cases CR (\r).
    • +
    • LF, CR, and CRLF — Different types of line breaks, typically determined by + the OS. Linux, OSX, and other *NIX operating systems generally use a line feed + (LF or \n) character. Windows uses a carriage return (CR or \r) and a line + feed character, effectively “CRLF” (\r\n).
    • +
    +

    CSV Format Specification

    +

    The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, + “SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be + interpreted as described in RFC 2119.

    +

    These rules are mostly based on the corresponding section from RFC + 4180, with minor changes, + clarifications and improved examples.

    +
      +
    1. +

      Each record starts at the beginning of its own line, and ends with a line + break (shown as ¬).

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,yyy,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    2. +
    3. +

      Though it is RECOMMENDED, the last record in a file is not required to have a + ending line break.

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,yyy,zzz
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    4. +
    5. +

      There may be an OPTIONAL header line appearing as the first line of the file + with the same format as normal records. This header will contain names + corresponding to the fields in the file, and MUST contain the same number of + fields as the records in the rest of the file.

      +

      CSV:

      +
      field_1,field_2,field_3¬
      +aaa,bbb,ccc¬
      +xxx,yyy,zzz¬
      +
      +

      JSON (ignoring headers):

      +
      +
      [ ["field_1", "field_2", "field_3"],
      +  ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +

      JSON (using headers):

      +
      +
      [ {"field_1": "aaa", "field_2": "bbb", "field_3": "ccc"},
      +  {"field_1": "xxx", "field_2": "yyy", "field_3": "zzz"} ]
      +
      +
      +
    6. +
    7. +

      Within each record and the OPTIONAL header, there may be one or more fields, + separated by a delimiter (normally a comma). Each record MUST contain the + same number of fields throughout the file.

      +

      CSV (invalid):

      +
      aaa,bbb,ccc¬
      +111,222,333,444¬
      +xxx,yyy,zzz¬
      +
      +
    8. +
    9. +

      The last field in a record MUST NOT be followed by a comma. This results in a + additional field with nothing in it.

      +

      CSV:

      +
      aaa,bbb,ccc,¬
      +xxx,yyy,zzz,¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc", ""],
      +  ["xxx", "yyy", "zzz", ""] ]
      +
      +
      +
    10. +
    11. +

      Spaces are considered part of a field and MUST NOT be ignored.

      +

      CSV:

      +
      aaa ,  bbb , ccc¬
      + xxx, yyy  ,zzz ¬
      +
      +

      JSON:

      +
      +
      [ ["aaa ", "  bbb ", " ccc"],
      +  [" xxx", " yyy  ", "zzz "] ]
      +
      +
      +
    12. +
    13. +

      Fields containing line breaks (CRLF, LF, or CR), double quotes, or the + delimiter character (normally a comma) MUST be enclosed in double-quotes.

      +

      CSV:

      +
      aaa,"b¬
      +bb",ccc¬
      +xxx,"y, yy",zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "b\r\nbb", "ccc"],
      +  ["xxx", "y, yy", "zzz"] ]
      +
      +
      +
    14. +
    15. +

      A double-quote appearing inside a field MUST be escaped by preceding it with + another double quote, and the field itself MUST be enclosed in double quotes.

      +

      CSV:

      +
      aaa,"b""bb",ccc¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "b\"bb", "ccc"] ]
      +
      +
      +
    16. +
    17. +

      When a field enclosed in double quotes has spaces before and/or after the + double quotes, the spaces MUST be ignored, as the field starts and ends with + the double quotes. However this is considered invalid formatting and the CSV + parser SHOULD report some form of warning message.

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,  "y, yy" ,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "y, yy", "zzz"] ]
      +
      +
      +
    18. +
    19. +

      It is possible to enclose every field in double quotes even if they don’t + need to be enclosed. However it is RECOMMENDED to only enclose fields in + double quotes that requires it.

      +

      CSV:

      +
      "aaa","bbb","ccc"¬
      +"xxx",yyy,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    20. +
    21. +

      All fields are always strings. CSV itself does not support type casting to + integers, floats, booleans, or anything else. It is not a CSV library’s + responsibility to type cast input CSV data.

      +

      If type casting is required, it is up to the developer using a specific CSV + library to ensure types are correctly dealt with.

      +

      Input JSON:

      +
      +
      [ [10, true, 0.3, null, "aaa"],
      +  [11, false, 2.13, "", "bbb"] ]
      +
      +
      +

      Output CSV:

      +
      10,true,0.3,,aaa¬
      +11,false,2.13,,bbb¬
      +
      +

      Output CSV parsed back to JSON:

      +
      +
      [ ["10", "true", "0.3", "", "aaa"],
      +  ["11", "false", "2.13", "", "bbb"] ]
      +
      +
      +

      At this point it is up to the developer themselves to type cast the above + output data from the CSV parser.

      +
    22. +
    23. However, when rendering type cast input data to CSV text, non-string types + MUST be converted to a string in such a way that minimal information is + lost. +
        +
      • Integers and floats MUST be rendered as a string version of themselves.
      • +
      • Booleans true and false MUST be rendered as true and false + strings, not as 1 or 0 numbers. If numbers are used the resulting + CSV data is indistinguishable from actual integer numbers.
      • +
      • Null/nil values MUST be rendered as empty strings.
      • +
      +
    24. +
    25. When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST + be supported.
    26. +
    27. When rendering output CSV data, CRLF MUST be used for line breaks to ensure + maximum cross-platform compatibility.
    28. +
    +

    About

    +

    This CSV specification is authored by Jim Myhrberg.

    +

    If you’d like to leave feedback, + please open an issue on GitHub.

    +

    License

    +

    CC0 1.0 Universal

    +
    +
    +
    + + + \ No newline at end of file diff --git a/docs/robots.txt b/docs/robots.txt new file mode 100644 index 0000000..e4a67c4 --- /dev/null +++ b/docs/robots.txt @@ -0,0 +1 @@ +Sitemap: https://csv-spec.org/sitemap.xml \ No newline at end of file diff --git a/docs/sitemap.xml b/docs/sitemap.xml new file mode 100644 index 0000000..1141125 --- /dev/null +++ b/docs/sitemap.xml @@ -0,0 +1,9 @@ + + + + https://csv-spec.org/spec/0.9.0-draft.1.html + + + https://csv-spec.org/ + + \ No newline at end of file diff --git a/docs/spec/0.9.0-draft.1.html b/docs/spec/0.9.0-draft.1.html new file mode 100644 index 0000000..e854fe1 --- /dev/null +++ b/docs/spec/0.9.0-draft.1.html @@ -0,0 +1,308 @@ + + + + + + + + + + + + CSV Spec 0.9.0-draft.1 | CSV Spec + + + + + + + + + + + + +
    + + + + +
    +
    +

    CSV Spec 0.9.0-draft.1

    +

    Summary

    +

    CSV is not a file format, it is a loose set of guidelines of how to structure + tabular data into a plain text string. As such there’s an endless amount of + *.csv files floating around which are highly incompatible with each other. The + closest thing there is to a specification is RFC + 4180.

    +

    Goals

    +

    This project is an attempt to summarize RFC 4180 and the information in the + Comma-separated values + (CSV) Wikipedia article + into a easy to understand format. The spec will also take into account that the + comma (,) character is not the only character used as a field + delimiter. Semi-colons (;), tabs (\t), and more are popular field delimiter + characters. As such the specification will more accurately be describing a + CSV-like structured data format.

    +

    We will also provide input/output test files that CSV parser/writer software + libraries can use to validate if they properly adhere to the rules laid out in + this specification. And if possible we will even try to provide code snippets in + various languages that attempts to automatically determine the delimiter + character used in any given input CSV-like formatted file/data.

    +

    Roadmap

    +
      +
    1. Write up core specification rules. [in-progress]
    2. +
    3. Create input/output test files covering all rules in the specification.
    4. +
    5. Create website for csv-spec.org.
    6. +
    7. Create linting tool as a NPM module, allowing easy validation of CSV data + both client-side in a web browser, and server side via a command line tool.
    8. +
    9. Create automatic delimiter character detection code snippets in various + programming languages which CSV parser developers can freely use to enhance + their libraries.
    10. +
    +

    Terminology

    +
      +
    • Field — A singular String value within a record.
    • +
    • Record (or Row) — A collection of fields. This is often referred to as + a “line”, but a single record can span multiple text lines if a field within + it contains one or more line breaks.
    • +
    • Delimiter — The character used to separate fields withing a row. Commonly + this will be a comma (,), but semi-colons (;) or tabs (\t) are two other + popular delimiter characters.
    • +
    • Header — The first row is often used to contain the column names for all + remaining rows. Header names would be used as key names when CSV data is + converted to JSON for example.
    • +
    • Line Break — Line breaks in CSV files can be CRLF (\r\n), LF (\n), and + even in rare cases CR (\r).
    • +
    • LF, CR, and CRLF — Different types of line breaks, typically determined by + the OS. Linux, OSX, and other *NIX operating systems generally use a line feed + (LF or \n) character. Windows uses a carriage return (CR or \r) and a line + feed character, effectively “CRLF” (\r\n).
    • +
    +

    CSV Format Specification

    +

    The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, + “SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be + interpreted as described in RFC 2119.

    +

    These rules are mostly based on the corresponding section from RFC + 4180, with minor changes, + clarifications and improved examples.

    +
      +
    1. +

      Each record starts at the beginning of its own line, and ends with a line + break (shown as ¬).

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,yyy,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    2. +
    3. +

      Though it is RECOMMENDED, the last record in a file is not required to have a + ending line break.

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,yyy,zzz
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    4. +
    5. +

      There may be an OPTIONAL header line appearing as the first line of the file + with the same format as normal records. This header will contain names + corresponding to the fields in the file, and MUST contain the same number of + fields as the records in the rest of the file.

      +

      CSV:

      +
      field_1,field_2,field_3¬
      +aaa,bbb,ccc¬
      +xxx,yyy,zzz¬
      +
      +

      JSON (ignoring headers):

      +
      +
      [ ["field_1", "field_2", "field_3"],
      +  ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +

      JSON (using headers):

      +
      +
      [ {"field_1": "aaa", "field_2": "bbb", "field_3": "ccc"},
      +  {"field_1": "xxx", "field_2": "yyy", "field_3": "zzz"} ]
      +
      +
      +
    6. +
    7. +

      Within each record and the OPTIONAL header, there may be one or more fields, + separated by a delimiter (normally a comma). Each record MUST contain the + same number of fields throughout the file.

      +

      CSV (invalid):

      +
      aaa,bbb,ccc¬
      +111,222,333,444¬
      +xxx,yyy,zzz¬
      +
      +
    8. +
    9. +

      The last field in a record MUST NOT be followed by a comma. This results in a + additional field with nothing in it.

      +

      CSV:

      +
      aaa,bbb,ccc,¬
      +xxx,yyy,zzz,¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc", ""],
      +  ["xxx", "yyy", "zzz", ""] ]
      +
      +
      +
    10. +
    11. +

      Spaces are considered part of a field and MUST NOT be ignored.

      +

      CSV:

      +
      aaa ,  bbb , ccc¬
      + xxx, yyy  ,zzz ¬
      +
      +

      JSON:

      +
      +
      [ ["aaa ", "  bbb ", " ccc"],
      +  [" xxx", " yyy  ", "zzz "] ]
      +
      +
      +
    12. +
    13. +

      Fields containing line breaks (CRLF, LF, or CR), double quotes, or the + delimiter character (normally a comma) MUST be enclosed in double-quotes.

      +

      CSV:

      +
      aaa,"b¬
      +bb",ccc¬
      +xxx,"y, yy",zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "b\r\nbb", "ccc"],
      +  ["xxx", "y, yy", "zzz"] ]
      +
      +
      +
    14. +
    15. +

      A double-quote appearing inside a field MUST be escaped by preceding it with + another double quote, and the field itself MUST be enclosed in double quotes.

      +

      CSV:

      +
      aaa,"b""bb",ccc¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "b\"bb", "ccc"] ]
      +
      +
      +
    16. +
    17. +

      When a field enclosed in double quotes has spaces before and/or after the + double quotes, the spaces MUST be ignored, as the field starts and ends with + the double quotes. However this is considered invalid formatting and the CSV + parser SHOULD report some form of warning message.

      +

      CSV:

      +
      aaa,bbb,ccc¬
      +xxx,  "y, yy" ,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "y, yy", "zzz"] ]
      +
      +
      +
    18. +
    19. +

      It is possible to enclose every field in double quotes even if they don’t + need to be enclosed. However it is RECOMMENDED to only enclose fields in + double quotes that requires it.

      +

      CSV:

      +
      "aaa","bbb","ccc"¬
      +"xxx",yyy,zzz¬
      +
      +

      JSON:

      +
      +
      [ ["aaa", "bbb", "ccc"],
      +  ["xxx", "yyy", "zzz"] ]
      +
      +
      +
    20. +
    21. +

      All fields are always strings. CSV itself does not support type casting to + integers, floats, booleans, or anything else. It is not a CSV library’s + responsibility to type cast input CSV data.

      +

      If type casting is required, it is up to the developer using a specific CSV + library to ensure types are correctly dealt with.

      +

      Input JSON:

      +
      +
      [ [10, true, 0.3, null, "aaa"],
      +  [11, false, 2.13, "", "bbb"] ]
      +
      +
      +

      Output CSV:

      +
      10,true,0.3,,aaa¬
      +11,false,2.13,,bbb¬
      +
      +

      Output CSV parsed back to JSON:

      +
      +
      [ ["10", "true", "0.3", "", "aaa"],
      +  ["11", "false", "2.13", "", "bbb"] ]
      +
      +
      +

      At this point it is up to the developer themselves to type cast the above + output data from the CSV parser.

      +
    22. +
    23. However, when rendering type cast input data to CSV text, non-string types + MUST be converted to a string in such a way that minimal information is + lost. +
        +
      • Integers and floats MUST be rendered as a string version of themselves.
      • +
      • Booleans true and false MUST be rendered as true and false + strings, not as 1 or 0 numbers. If numbers are used the resulting + CSV data is indistinguishable from actual integer numbers.
      • +
      • Null/nil values MUST be rendered as empty strings.
      • +
      +
    24. +
    25. When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST + be supported.
    26. +
    27. When rendering output CSV data, CRLF MUST be used for line breaks to ensure + maximum cross-platform compatibility.
    28. +
    +

    About

    +

    This CSV specification is authored by Jim Myhrberg.

    +

    If you’d like to leave feedback, + please open an issue on GitHub.

    +

    License

    +

    CC0 1.0 Universal

    +
    +
    +
    + + + \ No newline at end of file diff --git a/index.md b/index.md new file mode 100644 index 0000000..e2a314d --- /dev/null +++ b/index.md @@ -0,0 +1,312 @@ +--- +title: CSV Spec 0.9.0-draft.1 +version: 0.9.0-draft.1 +--- +CSV Spec 0.9.0-draft.1 +==================== + +Summary +------- + +CSV is not a file format, it is a loose set of guidelines of how to structure +tabular data into a plain text string. As such there's an endless amount of +`*.csv` files floating around which are highly incompatible with each other. The +closest thing there is to a specification is [RFC +4180](http://tools.ietf.org/html/rfc4180). + +Goals +----- + +This project is an attempt to summarize RFC 4180 and the information in the +[Comma-separated values +(CSV)](http://en.wikipedia.org/wiki/Comma-separated_values) Wikipedia article +into a easy to understand format. The spec will also take into account that the +comma (`,`) character is not the only character used as a field +delimiter. Semi-colons (`;`), tabs (`\t`), and more are popular field delimiter +characters. As such the specification will more accurately be describing a +CSV-like structured data format. + +We will also provide input/output test files that CSV parser/writer software +libraries can use to validate if they properly adhere to the rules laid out in +this specification. And if possible we will even try to provide code snippets in +various languages that attempts to automatically determine the delimiter +character used in any given input CSV-like formatted file/data. + +Roadmap +------- + +1. Write up core specification rules. _[in-progress]_ +2. Create input/output test files covering all rules in the specification. +3. Create website for [csv-spec.org](http://csv-spec.org/). +4. Create linting tool as a NPM module, allowing easy validation of CSV data + both client-side in a web browser, and server side via a command line tool. +5. Create automatic delimiter character detection code snippets in various + programming languages which CSV parser developers can freely use to enhance + their libraries. + +Terminology +----------- + +- **Field** — A singular String value within a record. +- **Record** (or **Row**) — A collection of fields. This is often referred to as + a "line", but a single record can span multiple text lines if a field within + it contains one or more line breaks. +- **Delimiter** — The character used to separate fields withing a row. Commonly + this will be a comma (`,`), but semi-colons (`;`) or tabs (`\t`) are two other + popular delimiter characters. +- **Header** — The first row is often used to contain the column names for all + remaining rows. Header names would be used as key names when CSV data is + converted to JSON for example. +- **Line Break** — Line breaks in CSV files can be CRLF (`\r\n`), LF (`\n`), and + even in rare cases CR (`\r`). +- **LF, CR, and CRLF** — Different types of line breaks, typically determined by + the OS. Linux, OSX, and other *NIX operating systems generally use a line feed + (LF or `\n`) character. Windows uses a carriage return (CR or `\r`) and a line + feed character, effectively "CRLF" (`\r\n`). + +CSV Format Specification +------------------------ + +The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", +"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be +interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119). + +These rules are mostly based on the corresponding section from [RFC +4180](http://tools.ietf.org/html/rfc4180#section-2), with minor changes, +clarifications and improved examples. + +1. Each record starts at the beginning of its own line, and ends with a line + break (shown as `¬`). + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx,yyy,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +2. Though it is RECOMMENDED, the last record in a file is not required to have a + ending line break. + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx,yyy,zzz + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +3. There may be an OPTIONAL header line appearing as the first line of the file + with the same format as normal records. This header will contain names + corresponding to the fields in the file, and MUST contain the same number of + fields as the records in the rest of the file. + + CSV: + + ```csv + field_1,field_2,field_3¬ + aaa,bbb,ccc¬ + xxx,yyy,zzz¬ + ``` + + JSON (ignoring headers): + + ```json + [ ["field_1", "field_2", "field_3"], + ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + + JSON (using headers): + + ```json + [ {"field_1": "aaa", "field_2": "bbb", "field_3": "ccc"}, + {"field_1": "xxx", "field_2": "yyy", "field_3": "zzz"} ] + ``` + +4. Within each record and the OPTIONAL header, there may be one or more fields, + separated by a delimiter (normally a comma). Each record MUST contain the + same number of fields throughout the file. + + CSV (invalid): + + ```csv + aaa,bbb,ccc¬ + 111,222,333,444¬ + xxx,yyy,zzz¬ + ``` + +5. The last field in a record MUST NOT be followed by a comma. This results in a + additional field with nothing in it. + + CSV: + + ```csv + aaa,bbb,ccc,¬ + xxx,yyy,zzz,¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc", ""], + ["xxx", "yyy", "zzz", ""] ] + ``` + +6. Spaces are considered part of a field and MUST NOT be ignored. + + CSV: + + ```csv + aaa , bbb , ccc¬ + xxx, yyy ,zzz ¬ + ``` + + JSON: + + ```json + [ ["aaa ", " bbb ", " ccc"], + [" xxx", " yyy ", "zzz "] ] + ``` + +7. Fields containing line breaks (CRLF, LF, or CR), double quotes, or the + delimiter character (normally a comma) MUST be enclosed in double-quotes. + + CSV: + + ```csv + aaa,"b¬ + bb",ccc¬ + xxx,"y, yy",zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "b\r\nbb", "ccc"], + ["xxx", "y, yy", "zzz"] ] + ``` + +8. A double-quote appearing inside a field MUST be escaped by preceding it with + another double quote, and the field itself MUST be enclosed in double quotes. + + CSV: + + ```csv + aaa,"b""bb",ccc¬ + ``` + + JSON: + + ```json + [ ["aaa", "b\"bb", "ccc"] ] + ``` + +9. When a field enclosed in double quotes has spaces before and/or after the + double quotes, the spaces MUST be ignored, as the field starts and ends with + the double quotes. However this is considered invalid formatting and the CSV + parser SHOULD report some form of warning message. + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx, "y, yy" ,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "y, yy", "zzz"] ] + ``` + +10. It is possible to enclose every field in double quotes even if they don't + need to be enclosed. However it is RECOMMENDED to only enclose fields in + double quotes that requires it. + + CSV: + + ```csv + "aaa","bbb","ccc"¬ + "xxx",yyy,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +11. All fields are always strings. CSV itself does not support type casting to + integers, floats, booleans, or anything else. It is not a CSV library's + responsibility to type cast input CSV data. + + If type casting is required, it is up to the developer using a specific CSV + library to ensure types are correctly dealt with. + + Input JSON: + + ```json + [ [10, true, 0.3, null, "aaa"], + [11, false, 2.13, "", "bbb"] ] + ``` + + Output CSV: + + ```csv + 10,true,0.3,,aaa¬ + 11,false,2.13,,bbb¬ + ``` + + Output CSV parsed back to JSON: + + ```json + [ ["10", "true", "0.3", "", "aaa"], + ["11", "false", "2.13", "", "bbb"] ] + ``` + + At this point it is up to the developer themselves to type cast the above + output data from the CSV parser. + +12. However, when rendering type cast input data to CSV text, non-string types + MUST be converted to a string in such a way that minimal information is + lost. + - Integers and floats MUST be rendered as a string version of themselves. + - Booleans `true` and `false` MUST be rendered as `true` and `false` + strings, not as `1` or `0` numbers. If numbers are used the resulting + CSV data is indistinguishable from actual integer numbers. + - `Null`/`nil` values MUST be rendered as empty strings. + +13. When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST + be supported. +14. When rendering output CSV data, CRLF MUST be used for line breaks to ensure + maximum cross-platform compatibility. + +About +----- + +This CSV specification is authored by [Jim Myhrberg](https://jimeh.me/). + +If you'd like to leave feedback, +please [open an issue on GitHub](https://github.com/parsecsv/csv-spec/issues). + +License +------- + +[CC0 1.0 Universal](http://creativecommons.org/publicdomain/zero/1.0/) + diff --git a/spec/0.9.0-draft.1.md b/spec/0.9.0-draft.1.md new file mode 100644 index 0000000..e2a314d --- /dev/null +++ b/spec/0.9.0-draft.1.md @@ -0,0 +1,312 @@ +--- +title: CSV Spec 0.9.0-draft.1 +version: 0.9.0-draft.1 +--- +CSV Spec 0.9.0-draft.1 +==================== + +Summary +------- + +CSV is not a file format, it is a loose set of guidelines of how to structure +tabular data into a plain text string. As such there's an endless amount of +`*.csv` files floating around which are highly incompatible with each other. The +closest thing there is to a specification is [RFC +4180](http://tools.ietf.org/html/rfc4180). + +Goals +----- + +This project is an attempt to summarize RFC 4180 and the information in the +[Comma-separated values +(CSV)](http://en.wikipedia.org/wiki/Comma-separated_values) Wikipedia article +into a easy to understand format. The spec will also take into account that the +comma (`,`) character is not the only character used as a field +delimiter. Semi-colons (`;`), tabs (`\t`), and more are popular field delimiter +characters. As such the specification will more accurately be describing a +CSV-like structured data format. + +We will also provide input/output test files that CSV parser/writer software +libraries can use to validate if they properly adhere to the rules laid out in +this specification. And if possible we will even try to provide code snippets in +various languages that attempts to automatically determine the delimiter +character used in any given input CSV-like formatted file/data. + +Roadmap +------- + +1. Write up core specification rules. _[in-progress]_ +2. Create input/output test files covering all rules in the specification. +3. Create website for [csv-spec.org](http://csv-spec.org/). +4. Create linting tool as a NPM module, allowing easy validation of CSV data + both client-side in a web browser, and server side via a command line tool. +5. Create automatic delimiter character detection code snippets in various + programming languages which CSV parser developers can freely use to enhance + their libraries. + +Terminology +----------- + +- **Field** — A singular String value within a record. +- **Record** (or **Row**) — A collection of fields. This is often referred to as + a "line", but a single record can span multiple text lines if a field within + it contains one or more line breaks. +- **Delimiter** — The character used to separate fields withing a row. Commonly + this will be a comma (`,`), but semi-colons (`;`) or tabs (`\t`) are two other + popular delimiter characters. +- **Header** — The first row is often used to contain the column names for all + remaining rows. Header names would be used as key names when CSV data is + converted to JSON for example. +- **Line Break** — Line breaks in CSV files can be CRLF (`\r\n`), LF (`\n`), and + even in rare cases CR (`\r`). +- **LF, CR, and CRLF** — Different types of line breaks, typically determined by + the OS. Linux, OSX, and other *NIX operating systems generally use a line feed + (LF or `\n`) character. Windows uses a carriage return (CR or `\r`) and a line + feed character, effectively "CRLF" (`\r\n`). + +CSV Format Specification +------------------------ + +The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", +"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be +interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119). + +These rules are mostly based on the corresponding section from [RFC +4180](http://tools.ietf.org/html/rfc4180#section-2), with minor changes, +clarifications and improved examples. + +1. Each record starts at the beginning of its own line, and ends with a line + break (shown as `¬`). + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx,yyy,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +2. Though it is RECOMMENDED, the last record in a file is not required to have a + ending line break. + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx,yyy,zzz + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +3. There may be an OPTIONAL header line appearing as the first line of the file + with the same format as normal records. This header will contain names + corresponding to the fields in the file, and MUST contain the same number of + fields as the records in the rest of the file. + + CSV: + + ```csv + field_1,field_2,field_3¬ + aaa,bbb,ccc¬ + xxx,yyy,zzz¬ + ``` + + JSON (ignoring headers): + + ```json + [ ["field_1", "field_2", "field_3"], + ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + + JSON (using headers): + + ```json + [ {"field_1": "aaa", "field_2": "bbb", "field_3": "ccc"}, + {"field_1": "xxx", "field_2": "yyy", "field_3": "zzz"} ] + ``` + +4. Within each record and the OPTIONAL header, there may be one or more fields, + separated by a delimiter (normally a comma). Each record MUST contain the + same number of fields throughout the file. + + CSV (invalid): + + ```csv + aaa,bbb,ccc¬ + 111,222,333,444¬ + xxx,yyy,zzz¬ + ``` + +5. The last field in a record MUST NOT be followed by a comma. This results in a + additional field with nothing in it. + + CSV: + + ```csv + aaa,bbb,ccc,¬ + xxx,yyy,zzz,¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc", ""], + ["xxx", "yyy", "zzz", ""] ] + ``` + +6. Spaces are considered part of a field and MUST NOT be ignored. + + CSV: + + ```csv + aaa , bbb , ccc¬ + xxx, yyy ,zzz ¬ + ``` + + JSON: + + ```json + [ ["aaa ", " bbb ", " ccc"], + [" xxx", " yyy ", "zzz "] ] + ``` + +7. Fields containing line breaks (CRLF, LF, or CR), double quotes, or the + delimiter character (normally a comma) MUST be enclosed in double-quotes. + + CSV: + + ```csv + aaa,"b¬ + bb",ccc¬ + xxx,"y, yy",zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "b\r\nbb", "ccc"], + ["xxx", "y, yy", "zzz"] ] + ``` + +8. A double-quote appearing inside a field MUST be escaped by preceding it with + another double quote, and the field itself MUST be enclosed in double quotes. + + CSV: + + ```csv + aaa,"b""bb",ccc¬ + ``` + + JSON: + + ```json + [ ["aaa", "b\"bb", "ccc"] ] + ``` + +9. When a field enclosed in double quotes has spaces before and/or after the + double quotes, the spaces MUST be ignored, as the field starts and ends with + the double quotes. However this is considered invalid formatting and the CSV + parser SHOULD report some form of warning message. + + CSV: + + ```csv + aaa,bbb,ccc¬ + xxx, "y, yy" ,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "y, yy", "zzz"] ] + ``` + +10. It is possible to enclose every field in double quotes even if they don't + need to be enclosed. However it is RECOMMENDED to only enclose fields in + double quotes that requires it. + + CSV: + + ```csv + "aaa","bbb","ccc"¬ + "xxx",yyy,zzz¬ + ``` + + JSON: + + ```json + [ ["aaa", "bbb", "ccc"], + ["xxx", "yyy", "zzz"] ] + ``` + +11. All fields are always strings. CSV itself does not support type casting to + integers, floats, booleans, or anything else. It is not a CSV library's + responsibility to type cast input CSV data. + + If type casting is required, it is up to the developer using a specific CSV + library to ensure types are correctly dealt with. + + Input JSON: + + ```json + [ [10, true, 0.3, null, "aaa"], + [11, false, 2.13, "", "bbb"] ] + ``` + + Output CSV: + + ```csv + 10,true,0.3,,aaa¬ + 11,false,2.13,,bbb¬ + ``` + + Output CSV parsed back to JSON: + + ```json + [ ["10", "true", "0.3", "", "aaa"], + ["11", "false", "2.13", "", "bbb"] ] + ``` + + At this point it is up to the developer themselves to type cast the above + output data from the CSV parser. + +12. However, when rendering type cast input data to CSV text, non-string types + MUST be converted to a string in such a way that minimal information is + lost. + - Integers and floats MUST be rendered as a string version of themselves. + - Booleans `true` and `false` MUST be rendered as `true` and `false` + strings, not as `1` or `0` numbers. If numbers are used the resulting + CSV data is indistinguishable from actual integer numbers. + - `Null`/`nil` values MUST be rendered as empty strings. + +13. When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST + be supported. +14. When rendering output CSV data, CRLF MUST be used for line breaks to ensure + maximum cross-platform compatibility. + +About +----- + +This CSV specification is authored by [Jim Myhrberg](https://jimeh.me/). + +If you'd like to leave feedback, +please [open an issue on GitHub](https://github.com/parsecsv/csv-spec/issues). + +License +------- + +[CC0 1.0 Universal](http://creativecommons.org/publicdomain/zero/1.0/) +