Initial commit

This commit is contained in:
2017-10-08 15:27:35 +01:00
commit fd1e7465b8
26 changed files with 2093 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
_site
.asset-cache
.sass-cache
.jekyll-metadata
docs/assets/.sprockets-manifest-*.json

9
404.html Normal file
View File

@@ -0,0 +1,9 @@
---
title: 404 Page Not Found
sitemap: false
---
<div class="header">
<h1>404</h1>
<p><strong>Page not found :(</strong></p>
<p>The requested page could not be found.</p>
</div>

1
CNAME Normal file
View File

@@ -0,0 +1 @@
csv-spec.org

21
Gemfile Normal file
View File

@@ -0,0 +1,21 @@
source 'https://rubygems.org'
gem 'jekyll', '3.5.0'
group :development do
gem 'rake'
gem 'rubocop'
end
# If you have any plugins, put them here!
group :jekyll_plugins do
gem 'jekyll-assets'
gem 'jekyll-pants'
gem 'jekyll-seo-tag'
gem 'jekyll-sitemap'
gem 'jekyll-tidy'
gem 'uglifier' # required by 'jekyll-assets' for JS compression
end
# Windows does not include zoneinfo files, so bundle the tzinfo-data gem
gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby]

102
Gemfile.lock Normal file
View File

@@ -0,0 +1,102 @@
GEM
remote: https://rubygems.org/
specs:
addressable (2.5.1)
public_suffix (~> 2.0, >= 2.0.2)
ast (2.3.0)
colorator (1.1.0)
concurrent-ruby (1.0.5)
execjs (2.7.0)
extras (0.3.0)
forwardable-extended (~> 2.5)
fastimage (2.1.0)
ffi (1.9.18)
forwardable-extended (2.6.0)
htmlbeautifier (1.3.1)
htmlcompressor (0.3.1)
jekyll (3.5.0)
addressable (~> 2.4)
colorator (~> 1.0)
jekyll-sass-converter (~> 1.0)
jekyll-watch (~> 1.1)
kramdown (~> 1.3)
liquid (~> 4.0)
mercenary (~> 0.3.3)
pathutil (~> 0.9)
rouge (~> 1.7)
safe_yaml (~> 1.0)
jekyll-assets (2.3.2)
concurrent-ruby (~> 1.0)
extras (~> 0.2)
fastimage (~> 2.0, >= 1.8)
jekyll (~> 3.1, >= 3.0)
pathutil (>= 0.8)
rack (~> 1.6)
sprockets (~> 3.3, < 3.8)
jekyll-pants (0.2.1)
rubypants
jekyll-sass-converter (1.5.0)
sass (~> 3.4)
jekyll-seo-tag (2.2.3)
jekyll (~> 3.3)
jekyll-sitemap (1.0.0)
jekyll (~> 3.3)
jekyll-tidy (0.2.2)
htmlbeautifier
htmlcompressor
jekyll
jekyll-watch (1.5.0)
listen (~> 3.0, < 3.1)
kramdown (1.14.0)
liquid (4.0.0)
listen (3.0.8)
rb-fsevent (~> 0.9, >= 0.9.4)
rb-inotify (~> 0.9, >= 0.9.7)
mercenary (0.3.6)
parser (2.4.0.0)
ast (~> 2.2)
pathutil (0.14.0)
forwardable-extended (~> 2.6)
powerpack (0.1.1)
public_suffix (2.0.5)
rack (1.6.8)
rainbow (2.2.1)
rake (12.0.0)
rb-fsevent (0.10.2)
rb-inotify (0.9.10)
ffi (>= 0.5.0, < 2)
rouge (1.11.1)
rubocop (0.47.1)
parser (>= 2.3.3.1, < 3.0)
powerpack (~> 0.1)
rainbow (>= 1.99.1, < 3.0)
ruby-progressbar (~> 1.7)
unicode-display_width (~> 1.0, >= 1.0.1)
ruby-progressbar (1.8.1)
rubypants (0.6.0)
safe_yaml (1.0.4)
sass (3.4.25)
sprockets (3.7.1)
concurrent-ruby (~> 1.0)
rack (> 1, < 3)
uglifier (3.2.0)
execjs (>= 0.3.0, < 3)
unicode-display_width (1.3.0)
PLATFORMS
ruby
DEPENDENCIES
jekyll (= 3.5.0)
jekyll-assets
jekyll-pants
jekyll-seo-tag
jekyll-sitemap
jekyll-tidy
rake
rubocop
tzinfo-data
uglifier
BUNDLED WITH
1.14.6

5
README.md Normal file
View File

@@ -0,0 +1,5 @@
# csv-spec.org
This is the site at http://csv-spec.org/ that attempts to describe CSV-like
formats in a obvious and easy to understand way, complete with code examples
aimed at developers.

85
Rakefile Normal file
View File

@@ -0,0 +1,85 @@
require 'open-uri'
require 'yaml'
desc 'Build site into docs directory'
task :build do
jekyll_build
end
desc 'Update index.md and spec folder based on versions in _config.yml'
task :update do
config = YAML.load_file('_config.yml')
current_version = config['current_version']
versions = config['versions']
remove_all_specs(config['update'])
puts ''
puts 'Fetching configured spec versions:'
versions.each do |version|
spec = fetch_spec(version, config['update'])
if current_version == version
write_file('index.md', spec[:body], " (#{version})")
end
filename = File.join(config['update']['output_dir'], version)
write_file("#{filename}.md", spec[:body])
write_file("#{filename}.svg", spec[:diagram]) if spec[:diagram]
end
jekyll_build
end
def jekyll_build
puts 'Rebuilding output into docs directory...'
exec 'jekyll build --destination docs && touch docs/.nojekyll'
end
def write_file(file, content, comment = nil)
puts " - #{file}#{comment}"
File.write(file, content)
end
def fetch_spec(version, config)
document = get(build_file_url('document', version, config))
if config['files']['diagram']
diagram = get(build_file_url('diagram', version, config))
img_tag = config['img_tpl'].gsub('{{file}}', "#{version}.svg")
document.gsub!(/\A(.*\n=+\n)/, "\\1\n#{img_tag}\n")
end
title = document.split("\n", 2).first
body = config['body_tpl'].gsub('{{content}}', document)
.gsub('{{title}}', title)
.gsub('{{version}}', version)
{
version: version,
title: title,
body: body,
diagram: diagram
}
end
def build_file_url(file, version, config)
config['url_tpl']
.gsub('{{version}}', version)
.gsub('{{file}}', config['files'][file])
end
def get(url)
URI.parse(url).read
rescue OpenURI::HTTPError
nil
end
def remove_all_specs(config)
puts ''
puts 'Removing existing spec files:'
Dir["#{config['output_dir']}/*"].each do |file|
puts " #{file.gsub(File.dirname(__FILE__), '')}"
File.delete(file)
end
end

90
_assets/css/_base.scss Normal file
View File

@@ -0,0 +1,90 @@
html {
height: 100%;
}
body {
font-family: 'Open Sans', Helvetica, Arial, sans-serif;
font-size: 16px;
font-weight: 400;
line-height: 1.5;
color: #1a1a1a;
background-color: #fdfdfd;
}
h1, h2, h3, h4, h5, h6 {
font-family: 'Open Sans Condensed', Helvetica, Arial, sans-serif;
font-weight: 700;
color: #333;
}
h1 {
font-size: 2.5em;
line-height: 1.2;
}
ol ol, ul ol {
list-style-type: lower-roman;
}
ul ul ol, ul ol ol, ol ul ol, ol ol ol {
list-style-type: lower-alpha;
}
.content {
margin-top: 80px;
a {
word-break: break-word;
}
code {
background-color: rgba(27,31,35,0.05);
border-radius: 3px;
font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace;
font-size: 85%;
margin: 0;
padding: 0.3em 0.4em 0.1em 0.4em;
}
pre {
background-color: #f6f8fa;
border-radius: 3px;
line-height: 1.45;
padding: 16px;
}
pre > code {
background-color: transparent !important;
border-radius: none;
font-size: 90%;
padding: 0;
}
}
#menu {
.pure-menu-label {
color: #999;
border: none;
padding: 0.6em 0 0.6em 0.6em;
}
.links {
font-size: 50px;
position: absolute;
bottom: 10px;
left: 0px;
right: 0px;
text-align: center;
a {
color: #555;
padding: 0;
position: relative;
text-decoration: none;
&:hover {
color: #777;
}
}
}
}

View File

@@ -0,0 +1,62 @@
/* https://github.com/jwarby/jekyll-pygments-themes/blob/22dfd74f9f10c87c0ec98876e136f02c989d43ba/github.css */
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #999988; font-style: italic } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #000000; font-weight: bold } /* Keyword */
.highlight .o { color: #000000; font-weight: bold } /* Operator */
.highlight .cm { color: #999988; font-style: italic } /* Comment.Multiline */
.highlight .cp { color: #999999; font-weight: bold; font-style: italic } /* Comment.Preproc */
.highlight .c1 { color: #999988; font-style: italic } /* Comment.Single */
.highlight .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #999999 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #aaaaaa } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #000000; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #000000; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #000000; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #000000; font-weight: bold } /* Keyword.Pseudo */
.highlight .kr { color: #000000; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #445588; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #009999 } /* Literal.Number */
.highlight .s { color: #d01040 } /* Literal.String */
.highlight .na { color: #008080 } /* Name.Attribute */
.highlight .nb { color: #0086B3 } /* Name.Builtin */
.highlight .nc { color: #445588; font-weight: bold } /* Name.Class */
.highlight .no { color: #008080 } /* Name.Constant */
.highlight .nd { color: #3c5d5d; font-weight: bold } /* Name.Decorator */
.highlight .ni { color: #800080 } /* Name.Entity */
.highlight .ne { color: #990000; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #990000; font-weight: bold } /* Name.Function */
.highlight .nl { color: #990000; font-weight: bold } /* Name.Label */
.highlight .nn { color: #555555 } /* Name.Namespace */
.highlight .nt { color: #000080 } /* Name.Tag */
.highlight .nv { color: #008080 } /* Name.Variable */
.highlight .ow { color: #000000; font-weight: bold } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mf { color: #009999 } /* Literal.Number.Float */
.highlight .mh { color: #009999 } /* Literal.Number.Hex */
.highlight .mi { color: #009999 } /* Literal.Number.Integer */
.highlight .mo { color: #009999 } /* Literal.Number.Oct */
.highlight .sb { color: #d01040 } /* Literal.String.Backtick */
.highlight .sc { color: #d01040 } /* Literal.String.Char */
.highlight .sd { color: #d01040 } /* Literal.String.Doc */
.highlight .s2 { color: #d01040 } /* Literal.String.Double */
.highlight .se { color: #d01040 } /* Literal.String.Escape */
.highlight .sh { color: #d01040 } /* Literal.String.Heredoc */
.highlight .si { color: #d01040 } /* Literal.String.Interpol */
.highlight .sx { color: #d01040 } /* Literal.String.Other */
.highlight .sr { color: #009926 } /* Literal.String.Regex */
.highlight .s1 { color: #d01040 } /* Literal.String.Single */
.highlight .ss { color: #990073 } /* Literal.String.Symbol */
.highlight .bp { color: #999999 } /* Name.Builtin.Pseudo */
.highlight .vc { color: #008080 } /* Name.Variable.Class */
.highlight .vg { color: #008080 } /* Name.Variable.Global */
.highlight .vi { color: #008080 } /* Name.Variable.Instance */
.highlight .il { color: #009999 } /* Literal.Number.Integer.Long */

248
_assets/css/_side-menu.scss Normal file
View File

@@ -0,0 +1,248 @@
body {
color: #777;
}
.pure-img-responsive {
max-width: 100%;
height: auto;
}
/*
Add transition to containers so they can push in and out.
*/
#layout,
#menu,
.menu-link {
-webkit-transition: all 0.2s ease-out;
-moz-transition: all 0.2s ease-out;
-ms-transition: all 0.2s ease-out;
-o-transition: all 0.2s ease-out;
transition: all 0.2s ease-out;
}
/*
This is the parent `<div>` that contains the menu and the content area.
*/
#layout {
position: relative;
left: 0;
padding-left: 0;
}
#layout.active #menu {
left: 150px;
width: 150px;
}
#layout.active .menu-link {
left: 150px;
}
/*
The content `<div>` is where all your content goes.
*/
.content {
margin: 0 auto;
padding: 0 2em;
max-width: 800px;
margin-bottom: 50px;
line-height: 1.6em;
}
.header {
margin: 0;
color: #333;
text-align: center;
padding: 2.5em 2em 0;
border-bottom: 1px solid #eee;
}
.header h1 {
margin: 0.2em 0;
font-size: 3em;
font-weight: 300;
}
.header h2 {
font-weight: 300;
color: #ccc;
padding: 0;
margin-top: 0;
}
.content-subhead {
margin: 50px 0 20px 0;
font-weight: 300;
color: #888;
}
/*
The `#menu` `<div>` is the parent `<div>` that contains the `.pure-menu` that
appears on the left side of the page.
*/
#menu {
margin-left: -150px; /* "#menu" width */
width: 150px;
position: fixed;
top: 0;
left: 0;
bottom: 0;
z-index: 1000; /* so the menu or its navicon stays above all content */
background: #191818;
overflow-y: auto;
-webkit-overflow-scrolling: touch;
}
/*
All anchors inside the menu should be styled like this.
*/
#menu a {
color: #999;
border: none;
padding: 0.6em 0 0.6em 0.6em;
}
/*
Remove all background/borders, since we are applying them to #menu.
*/
#menu .pure-menu,
#menu .pure-menu ul {
border: none;
background: transparent;
}
/*
Add that light border to separate items into groups.
*/
#menu .pure-menu ul,
#menu .pure-menu .menu-item-divided {
border-top: 1px solid #333;
}
/*
Change color of the anchor links on hover/focus.
*/
#menu .pure-menu li a:hover,
#menu .pure-menu li a:focus {
background: #333;
}
/*
This styles the selected menu item `<li>`.
*/
#menu .pure-menu-selected,
#menu .pure-menu-heading {
background: #1f8dd6;
}
/*
This styles a link within a selected menu item `<li>`.
*/
#menu .pure-menu-selected a {
color: #fff;
}
/*
This styles the menu heading.
*/
#menu .pure-menu-heading {
font-size: 110%;
color: #fff;
margin: 0;
}
/* -- Dynamic Button For Responsive Menu -------------------------------------*/
/*
The button to open/close the Menu is custom-made and not part of Pure. Here's
how it works:
*/
/*
`.menu-link` represents the responsive menu toggle that shows/hides on
small screens.
*/
.menu-link {
position: fixed;
display: block; /* show this only on small screens */
top: 0;
left: 0; /* "#menu width" */
background: #000;
background: rgba(0,0,0,0.7);
font-size: 10px; /* change this value to increase/decrease button size */
z-index: 10;
width: 2em;
height: auto;
padding: 2.1em 1.6em;
}
.menu-link:hover,
.menu-link:focus {
background: #000;
}
.menu-link span {
position: relative;
display: block;
}
.menu-link span,
.menu-link span:before,
.menu-link span:after {
background-color: #fff;
width: 100%;
height: 0.2em;
}
.menu-link span:before,
.menu-link span:after {
position: absolute;
margin-top: -0.6em;
content: " ";
}
.menu-link span:after {
margin-top: 0.6em;
}
/* -- Responsive Styles (Media Queries) ------------------------------------- */
/*
Hides the menu at `48em`, but modify this based on your app's needs.
*/
@media (min-width: 48em) {
.header,
.content {
padding-left: 2em;
padding-right: 2em;
}
#layout {
padding-left: 150px; /* left col width "#menu" */
left: 0;
}
#menu {
left: 150px;
}
.menu-link {
position: fixed;
left: 150px;
display: none;
}
#layout.active .menu-link {
left: 150px;
}
}
@media (max-width: 48em) {
/* Only apply this when the window is small. Otherwise, the following
case results in extra padding on the left:
* Make the window small.
* Tap the menu to trigger the active state.
* Make the window large again.
*/
#layout.active {
position: relative;
left: 150px;
}
}

3
_assets/css/main.scss Normal file
View File

@@ -0,0 +1,3 @@
@import "side-menu";
@import "highlight";
@import "base";

1
_assets/js/main.js Normal file
View File

@@ -0,0 +1 @@
// = require ui

44
_assets/js/ui.js Normal file
View File

@@ -0,0 +1,44 @@
(function (window, document) {
var layout = document.getElementById('layout');
var menu = document.getElementById('menu');
var menuLink = document.getElementById('menuLink');
var content = document.getElementById('main');
function toggleClass (element, className) {
var classes = element.className.split(/\s+/);
var length = classes.length;
var i = 0;
for (; i < length; i++) {
if (classes[i] === className) {
classes.splice(i, 1);
break;
}
}
// The className is not found
if (length === classes.length) {
classes.push(className);
}
element.className = classes.join(' ');
}
function toggleAll (e) {
var active = 'active';
e.preventDefault();
toggleClass(layout, active);
toggleClass(menu, active);
toggleClass(menuLink, active);
}
menuLink.onclick = function (e) {
toggleAll(e);
};
content.onclick = function (e) {
if (menu.className.indexOf('active') !== -1) {
toggleAll(e);
}
};
}(this, this.document));

50
_config.yml Normal file
View File

@@ -0,0 +1,50 @@
title: CSV Spec
description: >
An attempt to describe CSV-like formats in a obvious and easy to understand
way, complete with code examples aimed at developers.
author: Jim Myhrberg
hostname: csv-spec.org
url: https://csv-spec.org
repo_url: https://github.com/parsecsv/csv-spec
current_version: 0.9.0-draft.1
versions:
- 0.9.0-draft.1
exclude:
- Gemfile
- Gemfile.lock
- Rakefile
- README.md
update:
body_tpl: |
---
title: {{title}}
version: {{version}}
---
{{content}}
url_tpl: "https://github.com/parsecsv/csv-spec/raw/{{version}}/{{file}}"
output_dir: "spec"
files:
document: csv-spec.md
plugins:
- jekyll-assets
- jekyll-pants
- jekyll-sitemap
- jekyll-seo-tag
- jekyll-tidy
defaults:
-
scope:
path: ""
values:
layout: "default"
assets:
digest: true
compress:
css: true
js: true

54
_layouts/default.html Normal file
View File

@@ -0,0 +1,54 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href='https://fonts.googleapis.com/css?family=Open+Sans+Condensed:700,300|Open+Sans:400italic,700italic,400,700' rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.0/build/pure-min.css" integrity="sha384-nn4HPE8lTHyVtfCBi5yW9d20FjT8BJwUXyWZT9InLYax14RDjBj46LmSztkmNP9w" crossorigin="anonymous">
<link rel="stylesheet" href="https:////maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css">
{% css main %}
{% seo %}
</head>
<body>
<div id="layout">
<a href="#menu" id="menuLink" class="menu-link">
<span></span>
</a>
<div id="menu">
<div class="pure-menu">
<ul class="pure-menu-list">
<li class="pure-menu-item">
<div class="pure-menu-label">Versions:</div>
</li>
{% for version in site.versions %}
{% assign expected = "spec/" | append: version | append: ".md" %}
{% assign found = site.pages | where: "path", expected | first %}
{% assign selected = "" %}
{% if version == page.version %}
{% assign selected = " pure-menu-selected" %}
{% endif %}
{% if found %}
<li class="pure-menu-item version-{{ version }}{{ selected }}">
<a href="{{ found.url }}" class="pure-menu-link">{{ version }}</a>
</li>
{% endif %}
{% endfor %}
</ul>
</div>
<div class="links">
<a href="{{ site.repo_url }}">
<i class="fa fa-github" aria-hidden="true"></i>
</a>
</div>
</div>
<div id="main">
<div class="content">
{{ content }}
</div>
</div>
</div>
{% js main %}
</body>
</html>

0
docs/.nojekyll Normal file
View File

60
docs/404.html Normal file
View File

@@ -0,0 +1,60 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href='https://fonts.googleapis.com/css?family=Open+Sans+Condensed:700,300|Open+Sans:400italic,700italic,400,700' rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.0/build/pure-min.css" integrity="sha384-nn4HPE8lTHyVtfCBi5yW9d20FjT8BJwUXyWZT9InLYax14RDjBj46LmSztkmNP9w" crossorigin="anonymous">
<link rel="stylesheet" href="https:////maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css">
<link type="text/css" rel="stylesheet" href="/assets/main-5df19fc13b2a391dcab974f0584820f95eecdd416e60b99950f2058c61a2e99f.css">
<!-- Begin Jekyll SEO tag v2.2.3 -->
<title>404 Page Not Found | CSV Spec</title>
<meta property="og:title" content="404 Page Not Found" />
<meta name="author" content="Jim Myhrberg" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="An attempt to describe CSV-like formats in a obvious and easy to understand way, complete with code examples aimed at developers." />
<meta property="og:description" content="An attempt to describe CSV-like formats in a obvious and easy to understand way, complete with code examples aimed at developers." />
<link rel="canonical" href="https://csv-spec.org/404.html" />
<meta property="og:url" content="https://csv-spec.org/404.html" />
<meta property="og:site_name" content="CSV Spec" />
<script type="application/ld+json">
{"@context":"http://schema.org","@type":"WebPage","headline":"404 Page Not Found","author":{"@type":"Person","name":"Jim Myhrberg"},"description":"An attempt to describe CSV-like formats in a obvious and easy to understand way, complete with code examples aimed at developers.","url":"https://csv-spec.org/404.html"}
</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div id="layout">
<a href="#menu" id="menuLink" class="menu-link">
<span></span>
</a>
<div id="menu">
<div class="pure-menu">
<ul class="pure-menu-list">
<li class="pure-menu-item">
<div class="pure-menu-label">Versions:</div>
</li>
<li class="pure-menu-item version-0.9.0-draft.1">
<a href="/spec/0.9.0-draft.1.html" class="pure-menu-link">0.9.0-draft.1</a>
</li>
</ul>
</div>
<div class="links">
<a href="https://github.com/parsecsv/csv-spec">
<i class="fa fa-github" aria-hidden="true"></i>
</a>
</div>
</div>
<div id="main">
<div class="content">
<div class="header">
<h1>404</h1>
<p><strong>Page not found :(</strong></p>
<p>The requested page could not be found.</p>
</div>
</div>
</div>
</div>
<script type="text/javascript" src="/assets/main-870855580c69dec57be4c965d0cf8afe78afa6b7b6f6bdb5aff91ac0256c0a1a.js"></script>
</body>
</html>

1
docs/CNAME Normal file
View File

@@ -0,0 +1 @@
csv-spec.org

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
!function(e,n){function t(e,n){for(var t=e.className.split(/\s+/),i=t.length,c=0;c<i;c++)if(t[c]===n){t.splice(c,1);break}i===t.length&&t.push(n),e.className=t.join(" ")}function i(e){var n="active";e.preventDefault(),t(c,n),t(a,n),t(l,n)}var c=n.getElementById("layout"),a=n.getElementById("menu"),l=n.getElementById("menuLink"),m=n.getElementById("main");l.onclick=function(e){i(e)},m.onclick=function(e){-1!==a.className.indexOf("active")&&i(e)}}(0,this.document);

308
docs/index.html Normal file
View File

@@ -0,0 +1,308 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href='https://fonts.googleapis.com/css?family=Open+Sans+Condensed:700,300|Open+Sans:400italic,700italic,400,700' rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.0/build/pure-min.css" integrity="sha384-nn4HPE8lTHyVtfCBi5yW9d20FjT8BJwUXyWZT9InLYax14RDjBj46LmSztkmNP9w" crossorigin="anonymous">
<link rel="stylesheet" href="https:////maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css">
<link type="text/css" rel="stylesheet" href="/assets/main-5df19fc13b2a391dcab974f0584820f95eecdd416e60b99950f2058c61a2e99f.css">
<!-- Begin Jekyll SEO tag v2.2.3 -->
<title>CSV Spec 0.9.0-draft.1 | CSV Spec</title>
<meta property="og:title" content="CSV Spec 0.9.0-draft.1" />
<meta name="author" content="Jim Myhrberg" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="An attempt to describe CSV-like formats in a obvious and easy to understand way, complete with code examples aimed at developers." />
<meta property="og:description" content="An attempt to describe CSV-like formats in a obvious and easy to understand way, complete with code examples aimed at developers." />
<link rel="canonical" href="https://csv-spec.org/" />
<meta property="og:url" content="https://csv-spec.org/" />
<meta property="og:site_name" content="CSV Spec" />
<script type="application/ld+json">
{"@context":"http://schema.org","@type":"WebSite","name":"CSV Spec","headline":"CSV Spec 0.9.0-draft.1","author":{"@type":"Person","name":"Jim Myhrberg"},"description":"An attempt to describe CSV-like formats in a obvious and easy to understand way, complete with code examples aimed at developers.","url":"https://csv-spec.org/"}
</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div id="layout">
<a href="#menu" id="menuLink" class="menu-link">
<span></span>
</a>
<div id="menu">
<div class="pure-menu">
<ul class="pure-menu-list">
<li class="pure-menu-item">
<div class="pure-menu-label">Versions:</div>
</li>
<li class="pure-menu-item version-0.9.0-draft.1 pure-menu-selected">
<a href="/spec/0.9.0-draft.1.html" class="pure-menu-link">0.9.0-draft.1</a>
</li>
</ul>
</div>
<div class="links">
<a href="https://github.com/parsecsv/csv-spec">
<i class="fa fa-github" aria-hidden="true"></i>
</a>
</div>
</div>
<div id="main">
<div class="content">
<h1 id="csv-spec-090-draft1">CSV Spec 0.9.0-draft.1</h1>
<h2 id="summary">Summary</h2>
<p>CSV is not a file format, it is a loose set of guidelines of how to structure
tabular data into a plain text string. As such theres an endless amount of
<code class="highlighter-rouge">*.csv</code> files floating around which are highly incompatible with each other. The
closest thing there is to a specification is <a href="http://tools.ietf.org/html/rfc4180">RFC
4180</a>.</p>
<h2 id="goals">Goals</h2>
<p>This project is an attempt to summarize RFC 4180 and the information in the
<a href="http://en.wikipedia.org/wiki/Comma-separated_values">Comma-separated values
(CSV)</a> Wikipedia article
into a easy to understand format. The spec will also take into account that the
comma (<code class="highlighter-rouge">,</code>) character is not the only character used as a field
delimiter. Semi-colons (<code class="highlighter-rouge">;</code>), tabs (<code class="highlighter-rouge">\t</code>), and more are popular field delimiter
characters. As such the specification will more accurately be describing a
CSV-like structured data format.</p>
<p>We will also provide input/output test files that CSV parser/writer software
libraries can use to validate if they properly adhere to the rules laid out in
this specification. And if possible we will even try to provide code snippets in
various languages that attempts to automatically determine the delimiter
character used in any given input CSV-like formatted file/data.</p>
<h2 id="roadmap">Roadmap</h2>
<ol>
<li>Write up core specification rules. <em>[in-progress]</em></li>
<li>Create input/output test files covering all rules in the specification.</li>
<li>Create website for <a href="http://csv-spec.org/">csv-spec.org</a>.</li>
<li>Create linting tool as a NPM module, allowing easy validation of CSV data
both client-side in a web browser, and server side via a command line tool.</li>
<li>Create automatic delimiter character detection code snippets in various
programming languages which CSV parser developers can freely use to enhance
their libraries.</li>
</ol>
<h2 id="terminology">Terminology</h2>
<ul>
<li><strong>Field</strong> — A singular String value within a record.</li>
<li><strong>Record</strong> (or <strong>Row</strong>) — A collection of fields. This is often referred to as
a “line”, but a single record can span multiple text lines if a field within
it contains one or more line breaks.</li>
<li><strong>Delimiter</strong> — The character used to separate fields withing a row. Commonly
this will be a comma (<code class="highlighter-rouge">,</code>), but semi-colons (<code class="highlighter-rouge">;</code>) or tabs (<code class="highlighter-rouge">\t</code>) are two other
popular delimiter characters.</li>
<li><strong>Header</strong> — The first row is often used to contain the column names for all
remaining rows. Header names would be used as key names when CSV data is
converted to JSON for example.</li>
<li><strong>Line Break</strong> — Line breaks in CSV files can be CRLF (<code class="highlighter-rouge">\r\n</code>), LF (<code class="highlighter-rouge">\n</code>), and
even in rare cases CR (<code class="highlighter-rouge">\r</code>).</li>
<li><strong>LF, CR, and CRLF</strong> — Different types of line breaks, typically determined by
the OS. Linux, OSX, and other *NIX operating systems generally use a line feed
(LF or <code class="highlighter-rouge">\n</code>) character. Windows uses a carriage return (CR or <code class="highlighter-rouge">\r</code>) and a line
feed character, effectively “CRLF” (<code class="highlighter-rouge">\r\n</code>).</li>
</ul>
<h2 id="csv-format-specification">CSV Format Specification</h2>
<p>The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”,
“SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be
interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
<p>These rules are mostly based on the corresponding section from <a href="http://tools.ietf.org/html/rfc4180#section-2">RFC
4180</a>, with minor changes,
clarifications and improved examples.</p>
<ol>
<li>
<p>Each record starts at the beginning of its own line, and ends with a line
break (shown as <code class="highlighter-rouge">¬</code>).</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,bbb,ccc¬
xxx,yyy,zzz¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>Though it is RECOMMENDED, the last record in a file is not required to have a
ending line break.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,bbb,ccc¬
xxx,yyy,zzz
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>There may be an OPTIONAL header line appearing as the first line of the file
with the same format as normal records. This header will contain names
corresponding to the fields in the file, and MUST contain the same number of
fields as the records in the rest of the file.</p>
<p>CSV:</p>
<pre><code class="language-csv">field_1,field_2,field_3¬
aaa,bbb,ccc¬
xxx,yyy,zzz¬
</code></pre>
<p>JSON (ignoring headers):</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"field_1"</span><span class="p">,</span><span class="w"> </span><span class="s2">"field_2"</span><span class="p">,</span><span class="w"> </span><span class="s2">"field_3"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
<p>JSON (using headers):</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">{</span><span class="nt">"field_1"</span><span class="p">:</span><span class="w"> </span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="nt">"field_2"</span><span class="p">:</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="nt">"field_3"</span><span class="p">:</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">},</span><span class="w">
</span><span class="p">{</span><span class="nt">"field_1"</span><span class="p">:</span><span class="w"> </span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="nt">"field_2"</span><span class="p">:</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="nt">"field_3"</span><span class="p">:</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">}</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>Within each record and the OPTIONAL header, there may be one or more fields,
separated by a delimiter (normally a comma). Each record MUST contain the
same number of fields throughout the file.</p>
<p>CSV (invalid):</p>
<pre><code class="language-csv">aaa,bbb,ccc¬
111,222,333,444¬
xxx,yyy,zzz¬
</code></pre>
</li>
<li>
<p>The last field in a record MUST NOT be followed by a comma. This results in a
additional field with nothing in it.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,bbb,ccc,¬
xxx,yyy,zzz,¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">,</span><span class="w"> </span><span class="s2">""</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">,</span><span class="w"> </span><span class="s2">""</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>Spaces are considered part of a field and MUST NOT be ignored.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa , bbb , ccc¬
xxx, yyy ,zzz ¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa "</span><span class="p">,</span><span class="w"> </span><span class="s2">" bbb "</span><span class="p">,</span><span class="w"> </span><span class="s2">" ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">" xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">" yyy "</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz "</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>Fields containing line breaks (CRLF, LF, or CR), double quotes, or the
delimiter character (normally a comma) MUST be enclosed in double-quotes.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,"b¬
bb",ccc¬
xxx,"y, yy",zzz¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"b\r\nbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"y, yy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>A double-quote appearing inside a field MUST be escaped by preceding it with
another double quote, and the field itself MUST be enclosed in double quotes.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,"b""bb",ccc¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"b\"bb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>When a field enclosed in double quotes has spaces before and/or after the
double quotes, the spaces MUST be ignored, as the field starts and ends with
the double quotes. However this is considered invalid formatting and the CSV
parser SHOULD report some form of warning message.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,bbb,ccc¬
xxx, "y, yy" ,zzz¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"y, yy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>It is possible to enclose every field in double quotes even if they dont
need to be enclosed. However it is RECOMMENDED to only enclose fields in
double quotes that requires it.</p>
<p>CSV:</p>
<pre><code class="language-csv">"aaa","bbb","ccc"¬
"xxx",yyy,zzz¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>All fields are always strings. CSV itself does not support type casting to
integers, floats, booleans, or anything else. It is not a CSV librarys
responsibility to type cast input CSV data.</p>
<p>If type casting is required, it is up to the developer using a specific CSV
library to ensure types are correctly dealt with.</p>
<p>Input JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="mf">0.3</span><span class="p">,</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span><span class="w"> </span><span class="s2">"aaa"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="mi">11</span><span class="p">,</span><span class="w"> </span><span class="kc">false</span><span class="p">,</span><span class="w"> </span><span class="mf">2.13</span><span class="p">,</span><span class="w"> </span><span class="s2">""</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
<p>Output CSV:</p>
<pre><code class="language-csv">10,true,0.3,,aaa¬
11,false,2.13,,bbb¬
</code></pre>
<p>Output CSV parsed back to JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"10"</span><span class="p">,</span><span class="w"> </span><span class="s2">"true"</span><span class="p">,</span><span class="w"> </span><span class="s2">"0.3"</span><span class="p">,</span><span class="w"> </span><span class="s2">""</span><span class="p">,</span><span class="w"> </span><span class="s2">"aaa"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"11"</span><span class="p">,</span><span class="w"> </span><span class="s2">"false"</span><span class="p">,</span><span class="w"> </span><span class="s2">"2.13"</span><span class="p">,</span><span class="w"> </span><span class="s2">""</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
<p>At this point it is up to the developer themselves to type cast the above
output data from the CSV parser.</p>
</li>
<li>However, when rendering type cast input data to CSV text, non-string types
MUST be converted to a string in such a way that minimal information is
lost.
<ul>
<li>Integers and floats MUST be rendered as a string version of themselves.</li>
<li>Booleans <code class="highlighter-rouge">true</code> and <code class="highlighter-rouge">false</code> MUST be rendered as <code class="highlighter-rouge">true</code> and <code class="highlighter-rouge">false</code>
strings, not as <code class="highlighter-rouge">1</code> or <code class="highlighter-rouge">0</code> numbers. If numbers are used the resulting
CSV data is indistinguishable from actual integer numbers.</li>
<li><code class="highlighter-rouge">Null</code>/<code class="highlighter-rouge">nil</code> values MUST be rendered as empty strings.</li>
</ul>
</li>
<li>When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST
be supported.</li>
<li>When rendering output CSV data, CRLF MUST be used for line breaks to ensure
maximum cross-platform compatibility.</li>
</ol>
<h2 id="about">About</h2>
<p>This CSV specification is authored by <a href="https://jimeh.me/">Jim Myhrberg</a>.</p>
<p>If youd like to leave feedback,
please <a href="https://github.com/parsecsv/csv-spec/issues">open an issue on GitHub</a>.</p>
<h2 id="license">License</h2>
<p><a href="http://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 Universal</a></p>
</div>
</div>
</div>
<script type="text/javascript" src="/assets/main-870855580c69dec57be4c965d0cf8afe78afa6b7b6f6bdb5aff91ac0256c0a1a.js"></script>
</body>
</html>

1
docs/robots.txt Normal file
View File

@@ -0,0 +1 @@
Sitemap: https://csv-spec.org/sitemap.xml

9
docs/sitemap.xml Normal file
View File

@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://csv-spec.org/spec/0.9.0-draft.1.html</loc>
</url>
<url>
<loc>https://csv-spec.org/</loc>
</url>
</urlset>

View File

@@ -0,0 +1,308 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href='https://fonts.googleapis.com/css?family=Open+Sans+Condensed:700,300|Open+Sans:400italic,700italic,400,700' rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.0/build/pure-min.css" integrity="sha384-nn4HPE8lTHyVtfCBi5yW9d20FjT8BJwUXyWZT9InLYax14RDjBj46LmSztkmNP9w" crossorigin="anonymous">
<link rel="stylesheet" href="https:////maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css">
<link type="text/css" rel="stylesheet" href="/assets/main-5df19fc13b2a391dcab974f0584820f95eecdd416e60b99950f2058c61a2e99f.css">
<!-- Begin Jekyll SEO tag v2.2.3 -->
<title>CSV Spec 0.9.0-draft.1 | CSV Spec</title>
<meta property="og:title" content="CSV Spec 0.9.0-draft.1" />
<meta name="author" content="Jim Myhrberg" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="An attempt to describe CSV-like formats in a obvious and easy to understand way, complete with code examples aimed at developers." />
<meta property="og:description" content="An attempt to describe CSV-like formats in a obvious and easy to understand way, complete with code examples aimed at developers." />
<link rel="canonical" href="https://csv-spec.org/spec/0.9.0-draft.1.html" />
<meta property="og:url" content="https://csv-spec.org/spec/0.9.0-draft.1.html" />
<meta property="og:site_name" content="CSV Spec" />
<script type="application/ld+json">
{"@context":"http://schema.org","@type":"WebPage","headline":"CSV Spec 0.9.0-draft.1","author":{"@type":"Person","name":"Jim Myhrberg"},"description":"An attempt to describe CSV-like formats in a obvious and easy to understand way, complete with code examples aimed at developers.","url":"https://csv-spec.org/spec/0.9.0-draft.1.html"}
</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div id="layout">
<a href="#menu" id="menuLink" class="menu-link">
<span></span>
</a>
<div id="menu">
<div class="pure-menu">
<ul class="pure-menu-list">
<li class="pure-menu-item">
<div class="pure-menu-label">Versions:</div>
</li>
<li class="pure-menu-item version-0.9.0-draft.1 pure-menu-selected">
<a href="/spec/0.9.0-draft.1.html" class="pure-menu-link">0.9.0-draft.1</a>
</li>
</ul>
</div>
<div class="links">
<a href="https://github.com/parsecsv/csv-spec">
<i class="fa fa-github" aria-hidden="true"></i>
</a>
</div>
</div>
<div id="main">
<div class="content">
<h1 id="csv-spec-090-draft1">CSV Spec 0.9.0-draft.1</h1>
<h2 id="summary">Summary</h2>
<p>CSV is not a file format, it is a loose set of guidelines of how to structure
tabular data into a plain text string. As such theres an endless amount of
<code class="highlighter-rouge">*.csv</code> files floating around which are highly incompatible with each other. The
closest thing there is to a specification is <a href="http://tools.ietf.org/html/rfc4180">RFC
4180</a>.</p>
<h2 id="goals">Goals</h2>
<p>This project is an attempt to summarize RFC 4180 and the information in the
<a href="http://en.wikipedia.org/wiki/Comma-separated_values">Comma-separated values
(CSV)</a> Wikipedia article
into a easy to understand format. The spec will also take into account that the
comma (<code class="highlighter-rouge">,</code>) character is not the only character used as a field
delimiter. Semi-colons (<code class="highlighter-rouge">;</code>), tabs (<code class="highlighter-rouge">\t</code>), and more are popular field delimiter
characters. As such the specification will more accurately be describing a
CSV-like structured data format.</p>
<p>We will also provide input/output test files that CSV parser/writer software
libraries can use to validate if they properly adhere to the rules laid out in
this specification. And if possible we will even try to provide code snippets in
various languages that attempts to automatically determine the delimiter
character used in any given input CSV-like formatted file/data.</p>
<h2 id="roadmap">Roadmap</h2>
<ol>
<li>Write up core specification rules. <em>[in-progress]</em></li>
<li>Create input/output test files covering all rules in the specification.</li>
<li>Create website for <a href="http://csv-spec.org/">csv-spec.org</a>.</li>
<li>Create linting tool as a NPM module, allowing easy validation of CSV data
both client-side in a web browser, and server side via a command line tool.</li>
<li>Create automatic delimiter character detection code snippets in various
programming languages which CSV parser developers can freely use to enhance
their libraries.</li>
</ol>
<h2 id="terminology">Terminology</h2>
<ul>
<li><strong>Field</strong> — A singular String value within a record.</li>
<li><strong>Record</strong> (or <strong>Row</strong>) — A collection of fields. This is often referred to as
a “line”, but a single record can span multiple text lines if a field within
it contains one or more line breaks.</li>
<li><strong>Delimiter</strong> — The character used to separate fields withing a row. Commonly
this will be a comma (<code class="highlighter-rouge">,</code>), but semi-colons (<code class="highlighter-rouge">;</code>) or tabs (<code class="highlighter-rouge">\t</code>) are two other
popular delimiter characters.</li>
<li><strong>Header</strong> — The first row is often used to contain the column names for all
remaining rows. Header names would be used as key names when CSV data is
converted to JSON for example.</li>
<li><strong>Line Break</strong> — Line breaks in CSV files can be CRLF (<code class="highlighter-rouge">\r\n</code>), LF (<code class="highlighter-rouge">\n</code>), and
even in rare cases CR (<code class="highlighter-rouge">\r</code>).</li>
<li><strong>LF, CR, and CRLF</strong> — Different types of line breaks, typically determined by
the OS. Linux, OSX, and other *NIX operating systems generally use a line feed
(LF or <code class="highlighter-rouge">\n</code>) character. Windows uses a carriage return (CR or <code class="highlighter-rouge">\r</code>) and a line
feed character, effectively “CRLF” (<code class="highlighter-rouge">\r\n</code>).</li>
</ul>
<h2 id="csv-format-specification">CSV Format Specification</h2>
<p>The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”,
“SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be
interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
<p>These rules are mostly based on the corresponding section from <a href="http://tools.ietf.org/html/rfc4180#section-2">RFC
4180</a>, with minor changes,
clarifications and improved examples.</p>
<ol>
<li>
<p>Each record starts at the beginning of its own line, and ends with a line
break (shown as <code class="highlighter-rouge">¬</code>).</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,bbb,ccc¬
xxx,yyy,zzz¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>Though it is RECOMMENDED, the last record in a file is not required to have a
ending line break.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,bbb,ccc¬
xxx,yyy,zzz
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>There may be an OPTIONAL header line appearing as the first line of the file
with the same format as normal records. This header will contain names
corresponding to the fields in the file, and MUST contain the same number of
fields as the records in the rest of the file.</p>
<p>CSV:</p>
<pre><code class="language-csv">field_1,field_2,field_3¬
aaa,bbb,ccc¬
xxx,yyy,zzz¬
</code></pre>
<p>JSON (ignoring headers):</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"field_1"</span><span class="p">,</span><span class="w"> </span><span class="s2">"field_2"</span><span class="p">,</span><span class="w"> </span><span class="s2">"field_3"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
<p>JSON (using headers):</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">{</span><span class="nt">"field_1"</span><span class="p">:</span><span class="w"> </span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="nt">"field_2"</span><span class="p">:</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="nt">"field_3"</span><span class="p">:</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">},</span><span class="w">
</span><span class="p">{</span><span class="nt">"field_1"</span><span class="p">:</span><span class="w"> </span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="nt">"field_2"</span><span class="p">:</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="nt">"field_3"</span><span class="p">:</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">}</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>Within each record and the OPTIONAL header, there may be one or more fields,
separated by a delimiter (normally a comma). Each record MUST contain the
same number of fields throughout the file.</p>
<p>CSV (invalid):</p>
<pre><code class="language-csv">aaa,bbb,ccc¬
111,222,333,444¬
xxx,yyy,zzz¬
</code></pre>
</li>
<li>
<p>The last field in a record MUST NOT be followed by a comma. This results in a
additional field with nothing in it.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,bbb,ccc,¬
xxx,yyy,zzz,¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">,</span><span class="w"> </span><span class="s2">""</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">,</span><span class="w"> </span><span class="s2">""</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>Spaces are considered part of a field and MUST NOT be ignored.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa , bbb , ccc¬
xxx, yyy ,zzz ¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa "</span><span class="p">,</span><span class="w"> </span><span class="s2">" bbb "</span><span class="p">,</span><span class="w"> </span><span class="s2">" ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">" xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">" yyy "</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz "</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>Fields containing line breaks (CRLF, LF, or CR), double quotes, or the
delimiter character (normally a comma) MUST be enclosed in double-quotes.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,"b¬
bb",ccc¬
xxx,"y, yy",zzz¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"b\r\nbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"y, yy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>A double-quote appearing inside a field MUST be escaped by preceding it with
another double quote, and the field itself MUST be enclosed in double quotes.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,"b""bb",ccc¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"b\"bb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>When a field enclosed in double quotes has spaces before and/or after the
double quotes, the spaces MUST be ignored, as the field starts and ends with
the double quotes. However this is considered invalid formatting and the CSV
parser SHOULD report some form of warning message.</p>
<p>CSV:</p>
<pre><code class="language-csv">aaa,bbb,ccc¬
xxx, "y, yy" ,zzz¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"y, yy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>It is possible to enclose every field in double quotes even if they dont
need to be enclosed. However it is RECOMMENDED to only enclose fields in
double quotes that requires it.</p>
<p>CSV:</p>
<pre><code class="language-csv">"aaa","bbb","ccc"¬
"xxx",yyy,zzz¬
</code></pre>
<p>JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"aaa"</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">,</span><span class="w"> </span><span class="s2">"ccc"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"xxx"</span><span class="p">,</span><span class="w"> </span><span class="s2">"yyy"</span><span class="p">,</span><span class="w"> </span><span class="s2">"zzz"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
</li>
<li>
<p>All fields are always strings. CSV itself does not support type casting to
integers, floats, booleans, or anything else. It is not a CSV librarys
responsibility to type cast input CSV data.</p>
<p>If type casting is required, it is up to the developer using a specific CSV
library to ensure types are correctly dealt with.</p>
<p>Input JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="kc">true</span><span class="p">,</span><span class="w"> </span><span class="mf">0.3</span><span class="p">,</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span><span class="w"> </span><span class="s2">"aaa"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="mi">11</span><span class="p">,</span><span class="w"> </span><span class="kc">false</span><span class="p">,</span><span class="w"> </span><span class="mf">2.13</span><span class="p">,</span><span class="w"> </span><span class="s2">""</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
<p>Output CSV:</p>
<pre><code class="language-csv">10,true,0.3,,aaa¬
11,false,2.13,,bbb¬
</code></pre>
<p>Output CSV parsed back to JSON:</p>
<div class="language-json highlighter-rouge">
<pre class="highlight"><code><span class="p">[</span><span class="w"> </span><span class="p">[</span><span class="s2">"10"</span><span class="p">,</span><span class="w"> </span><span class="s2">"true"</span><span class="p">,</span><span class="w"> </span><span class="s2">"0.3"</span><span class="p">,</span><span class="w"> </span><span class="s2">""</span><span class="p">,</span><span class="w"> </span><span class="s2">"aaa"</span><span class="p">],</span><span class="w">
</span><span class="p">[</span><span class="s2">"11"</span><span class="p">,</span><span class="w"> </span><span class="s2">"false"</span><span class="p">,</span><span class="w"> </span><span class="s2">"2.13"</span><span class="p">,</span><span class="w"> </span><span class="s2">""</span><span class="p">,</span><span class="w"> </span><span class="s2">"bbb"</span><span class="p">]</span><span class="w"> </span><span class="p">]</span><span class="w">
</span></code></pre>
</div>
<p>At this point it is up to the developer themselves to type cast the above
output data from the CSV parser.</p>
</li>
<li>However, when rendering type cast input data to CSV text, non-string types
MUST be converted to a string in such a way that minimal information is
lost.
<ul>
<li>Integers and floats MUST be rendered as a string version of themselves.</li>
<li>Booleans <code class="highlighter-rouge">true</code> and <code class="highlighter-rouge">false</code> MUST be rendered as <code class="highlighter-rouge">true</code> and <code class="highlighter-rouge">false</code>
strings, not as <code class="highlighter-rouge">1</code> or <code class="highlighter-rouge">0</code> numbers. If numbers are used the resulting
CSV data is indistinguishable from actual integer numbers.</li>
<li><code class="highlighter-rouge">Null</code>/<code class="highlighter-rouge">nil</code> values MUST be rendered as empty strings.</li>
</ul>
</li>
<li>When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST
be supported.</li>
<li>When rendering output CSV data, CRLF MUST be used for line breaks to ensure
maximum cross-platform compatibility.</li>
</ol>
<h2 id="about">About</h2>
<p>This CSV specification is authored by <a href="https://jimeh.me/">Jim Myhrberg</a>.</p>
<p>If youd like to leave feedback,
please <a href="https://github.com/parsecsv/csv-spec/issues">open an issue on GitHub</a>.</p>
<h2 id="license">License</h2>
<p><a href="http://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 Universal</a></p>
</div>
</div>
</div>
<script type="text/javascript" src="/assets/main-870855580c69dec57be4c965d0cf8afe78afa6b7b6f6bdb5aff91ac0256c0a1a.js"></script>
</body>
</html>

312
index.md Normal file
View File

@@ -0,0 +1,312 @@
---
title: CSV Spec 0.9.0-draft.1
version: 0.9.0-draft.1
---
CSV Spec 0.9.0-draft.1
====================
Summary
-------
CSV is not a file format, it is a loose set of guidelines of how to structure
tabular data into a plain text string. As such there's an endless amount of
`*.csv` files floating around which are highly incompatible with each other. The
closest thing there is to a specification is [RFC
4180](http://tools.ietf.org/html/rfc4180).
Goals
-----
This project is an attempt to summarize RFC 4180 and the information in the
[Comma-separated values
(CSV)](http://en.wikipedia.org/wiki/Comma-separated_values) Wikipedia article
into a easy to understand format. The spec will also take into account that the
comma (`,`) character is not the only character used as a field
delimiter. Semi-colons (`;`), tabs (`\t`), and more are popular field delimiter
characters. As such the specification will more accurately be describing a
CSV-like structured data format.
We will also provide input/output test files that CSV parser/writer software
libraries can use to validate if they properly adhere to the rules laid out in
this specification. And if possible we will even try to provide code snippets in
various languages that attempts to automatically determine the delimiter
character used in any given input CSV-like formatted file/data.
Roadmap
-------
1. Write up core specification rules. _[in-progress]_
2. Create input/output test files covering all rules in the specification.
3. Create website for [csv-spec.org](http://csv-spec.org/).
4. Create linting tool as a NPM module, allowing easy validation of CSV data
both client-side in a web browser, and server side via a command line tool.
5. Create automatic delimiter character detection code snippets in various
programming languages which CSV parser developers can freely use to enhance
their libraries.
Terminology
-----------
- **Field** — A singular String value within a record.
- **Record** (or **Row**) — A collection of fields. This is often referred to as
a "line", but a single record can span multiple text lines if a field within
it contains one or more line breaks.
- **Delimiter** — The character used to separate fields withing a row. Commonly
this will be a comma (`,`), but semi-colons (`;`) or tabs (`\t`) are two other
popular delimiter characters.
- **Header** — The first row is often used to contain the column names for all
remaining rows. Header names would be used as key names when CSV data is
converted to JSON for example.
- **Line Break** — Line breaks in CSV files can be CRLF (`\r\n`), LF (`\n`), and
even in rare cases CR (`\r`).
- **LF, CR, and CRLF** — Different types of line breaks, typically determined by
the OS. Linux, OSX, and other *NIX operating systems generally use a line feed
(LF or `\n`) character. Windows uses a carriage return (CR or `\r`) and a line
feed character, effectively "CRLF" (`\r\n`).
CSV Format Specification
------------------------
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119).
These rules are mostly based on the corresponding section from [RFC
4180](http://tools.ietf.org/html/rfc4180#section-2), with minor changes,
clarifications and improved examples.
1. Each record starts at the beginning of its own line, and ends with a line
break (shown as `¬`).
CSV:
```csv
aaa,bbb,ccc¬
xxx,yyy,zzz¬
```
JSON:
```json
[ ["aaa", "bbb", "ccc"],
["xxx", "yyy", "zzz"] ]
```
2. Though it is RECOMMENDED, the last record in a file is not required to have a
ending line break.
CSV:
```csv
aaa,bbb,ccc¬
xxx,yyy,zzz
```
JSON:
```json
[ ["aaa", "bbb", "ccc"],
["xxx", "yyy", "zzz"] ]
```
3. There may be an OPTIONAL header line appearing as the first line of the file
with the same format as normal records. This header will contain names
corresponding to the fields in the file, and MUST contain the same number of
fields as the records in the rest of the file.
CSV:
```csv
field_1,field_2,field_3¬
aaa,bbb,ccc¬
xxx,yyy,zzz¬
```
JSON (ignoring headers):
```json
[ ["field_1", "field_2", "field_3"],
["aaa", "bbb", "ccc"],
["xxx", "yyy", "zzz"] ]
```
JSON (using headers):
```json
[ {"field_1": "aaa", "field_2": "bbb", "field_3": "ccc"},
{"field_1": "xxx", "field_2": "yyy", "field_3": "zzz"} ]
```
4. Within each record and the OPTIONAL header, there may be one or more fields,
separated by a delimiter (normally a comma). Each record MUST contain the
same number of fields throughout the file.
CSV (invalid):
```csv
aaa,bbb,ccc¬
111,222,333,444¬
xxx,yyy,zzz¬
```
5. The last field in a record MUST NOT be followed by a comma. This results in a
additional field with nothing in it.
CSV:
```csv
aaa,bbb,ccc,¬
xxx,yyy,zzz,¬
```
JSON:
```json
[ ["aaa", "bbb", "ccc", ""],
["xxx", "yyy", "zzz", ""] ]
```
6. Spaces are considered part of a field and MUST NOT be ignored.
CSV:
```csv
aaa , bbb , ccc¬
xxx, yyy ,zzz ¬
```
JSON:
```json
[ ["aaa ", " bbb ", " ccc"],
[" xxx", " yyy ", "zzz "] ]
```
7. Fields containing line breaks (CRLF, LF, or CR), double quotes, or the
delimiter character (normally a comma) MUST be enclosed in double-quotes.
CSV:
```csv
aaa,"b¬
bb",ccc¬
xxx,"y, yy",zzz¬
```
JSON:
```json
[ ["aaa", "b\r\nbb", "ccc"],
["xxx", "y, yy", "zzz"] ]
```
8. A double-quote appearing inside a field MUST be escaped by preceding it with
another double quote, and the field itself MUST be enclosed in double quotes.
CSV:
```csv
aaa,"b""bb",ccc¬
```
JSON:
```json
[ ["aaa", "b\"bb", "ccc"] ]
```
9. When a field enclosed in double quotes has spaces before and/or after the
double quotes, the spaces MUST be ignored, as the field starts and ends with
the double quotes. However this is considered invalid formatting and the CSV
parser SHOULD report some form of warning message.
CSV:
```csv
aaa,bbb,ccc¬
xxx, "y, yy" ,zzz¬
```
JSON:
```json
[ ["aaa", "bbb", "ccc"],
["xxx", "y, yy", "zzz"] ]
```
10. It is possible to enclose every field in double quotes even if they don't
need to be enclosed. However it is RECOMMENDED to only enclose fields in
double quotes that requires it.
CSV:
```csv
"aaa","bbb","ccc"¬
"xxx",yyy,zzz¬
```
JSON:
```json
[ ["aaa", "bbb", "ccc"],
["xxx", "yyy", "zzz"] ]
```
11. All fields are always strings. CSV itself does not support type casting to
integers, floats, booleans, or anything else. It is not a CSV library's
responsibility to type cast input CSV data.
If type casting is required, it is up to the developer using a specific CSV
library to ensure types are correctly dealt with.
Input JSON:
```json
[ [10, true, 0.3, null, "aaa"],
[11, false, 2.13, "", "bbb"] ]
```
Output CSV:
```csv
10,true,0.3,,aaa¬
11,false,2.13,,bbb¬
```
Output CSV parsed back to JSON:
```json
[ ["10", "true", "0.3", "", "aaa"],
["11", "false", "2.13", "", "bbb"] ]
```
At this point it is up to the developer themselves to type cast the above
output data from the CSV parser.
12. However, when rendering type cast input data to CSV text, non-string types
MUST be converted to a string in such a way that minimal information is
lost.
- Integers and floats MUST be rendered as a string version of themselves.
- Booleans `true` and `false` MUST be rendered as `true` and `false`
strings, not as `1` or `0` numbers. If numbers are used the resulting
CSV data is indistinguishable from actual integer numbers.
- `Null`/`nil` values MUST be rendered as empty strings.
13. When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST
be supported.
14. When rendering output CSV data, CRLF MUST be used for line breaks to ensure
maximum cross-platform compatibility.
About
-----
This CSV specification is authored by [Jim Myhrberg](https://jimeh.me/).
If you'd like to leave feedback,
please [open an issue on GitHub](https://github.com/parsecsv/csv-spec/issues).
License
-------
[CC0 1.0 Universal](http://creativecommons.org/publicdomain/zero/1.0/)

312
spec/0.9.0-draft.1.md Normal file
View File

@@ -0,0 +1,312 @@
---
title: CSV Spec 0.9.0-draft.1
version: 0.9.0-draft.1
---
CSV Spec 0.9.0-draft.1
====================
Summary
-------
CSV is not a file format, it is a loose set of guidelines of how to structure
tabular data into a plain text string. As such there's an endless amount of
`*.csv` files floating around which are highly incompatible with each other. The
closest thing there is to a specification is [RFC
4180](http://tools.ietf.org/html/rfc4180).
Goals
-----
This project is an attempt to summarize RFC 4180 and the information in the
[Comma-separated values
(CSV)](http://en.wikipedia.org/wiki/Comma-separated_values) Wikipedia article
into a easy to understand format. The spec will also take into account that the
comma (`,`) character is not the only character used as a field
delimiter. Semi-colons (`;`), tabs (`\t`), and more are popular field delimiter
characters. As such the specification will more accurately be describing a
CSV-like structured data format.
We will also provide input/output test files that CSV parser/writer software
libraries can use to validate if they properly adhere to the rules laid out in
this specification. And if possible we will even try to provide code snippets in
various languages that attempts to automatically determine the delimiter
character used in any given input CSV-like formatted file/data.
Roadmap
-------
1. Write up core specification rules. _[in-progress]_
2. Create input/output test files covering all rules in the specification.
3. Create website for [csv-spec.org](http://csv-spec.org/).
4. Create linting tool as a NPM module, allowing easy validation of CSV data
both client-side in a web browser, and server side via a command line tool.
5. Create automatic delimiter character detection code snippets in various
programming languages which CSV parser developers can freely use to enhance
their libraries.
Terminology
-----------
- **Field** — A singular String value within a record.
- **Record** (or **Row**) — A collection of fields. This is often referred to as
a "line", but a single record can span multiple text lines if a field within
it contains one or more line breaks.
- **Delimiter** — The character used to separate fields withing a row. Commonly
this will be a comma (`,`), but semi-colons (`;`) or tabs (`\t`) are two other
popular delimiter characters.
- **Header** — The first row is often used to contain the column names for all
remaining rows. Header names would be used as key names when CSV data is
converted to JSON for example.
- **Line Break** — Line breaks in CSV files can be CRLF (`\r\n`), LF (`\n`), and
even in rare cases CR (`\r`).
- **LF, CR, and CRLF** — Different types of line breaks, typically determined by
the OS. Linux, OSX, and other *NIX operating systems generally use a line feed
(LF or `\n`) character. Windows uses a carriage return (CR or `\r`) and a line
feed character, effectively "CRLF" (`\r\n`).
CSV Format Specification
------------------------
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119).
These rules are mostly based on the corresponding section from [RFC
4180](http://tools.ietf.org/html/rfc4180#section-2), with minor changes,
clarifications and improved examples.
1. Each record starts at the beginning of its own line, and ends with a line
break (shown as `¬`).
CSV:
```csv
aaa,bbb,ccc¬
xxx,yyy,zzz¬
```
JSON:
```json
[ ["aaa", "bbb", "ccc"],
["xxx", "yyy", "zzz"] ]
```
2. Though it is RECOMMENDED, the last record in a file is not required to have a
ending line break.
CSV:
```csv
aaa,bbb,ccc¬
xxx,yyy,zzz
```
JSON:
```json
[ ["aaa", "bbb", "ccc"],
["xxx", "yyy", "zzz"] ]
```
3. There may be an OPTIONAL header line appearing as the first line of the file
with the same format as normal records. This header will contain names
corresponding to the fields in the file, and MUST contain the same number of
fields as the records in the rest of the file.
CSV:
```csv
field_1,field_2,field_3¬
aaa,bbb,ccc¬
xxx,yyy,zzz¬
```
JSON (ignoring headers):
```json
[ ["field_1", "field_2", "field_3"],
["aaa", "bbb", "ccc"],
["xxx", "yyy", "zzz"] ]
```
JSON (using headers):
```json
[ {"field_1": "aaa", "field_2": "bbb", "field_3": "ccc"},
{"field_1": "xxx", "field_2": "yyy", "field_3": "zzz"} ]
```
4. Within each record and the OPTIONAL header, there may be one or more fields,
separated by a delimiter (normally a comma). Each record MUST contain the
same number of fields throughout the file.
CSV (invalid):
```csv
aaa,bbb,ccc¬
111,222,333,444¬
xxx,yyy,zzz¬
```
5. The last field in a record MUST NOT be followed by a comma. This results in a
additional field with nothing in it.
CSV:
```csv
aaa,bbb,ccc,¬
xxx,yyy,zzz,¬
```
JSON:
```json
[ ["aaa", "bbb", "ccc", ""],
["xxx", "yyy", "zzz", ""] ]
```
6. Spaces are considered part of a field and MUST NOT be ignored.
CSV:
```csv
aaa , bbb , ccc¬
xxx, yyy ,zzz ¬
```
JSON:
```json
[ ["aaa ", " bbb ", " ccc"],
[" xxx", " yyy ", "zzz "] ]
```
7. Fields containing line breaks (CRLF, LF, or CR), double quotes, or the
delimiter character (normally a comma) MUST be enclosed in double-quotes.
CSV:
```csv
aaa,"b¬
bb",ccc¬
xxx,"y, yy",zzz¬
```
JSON:
```json
[ ["aaa", "b\r\nbb", "ccc"],
["xxx", "y, yy", "zzz"] ]
```
8. A double-quote appearing inside a field MUST be escaped by preceding it with
another double quote, and the field itself MUST be enclosed in double quotes.
CSV:
```csv
aaa,"b""bb",ccc¬
```
JSON:
```json
[ ["aaa", "b\"bb", "ccc"] ]
```
9. When a field enclosed in double quotes has spaces before and/or after the
double quotes, the spaces MUST be ignored, as the field starts and ends with
the double quotes. However this is considered invalid formatting and the CSV
parser SHOULD report some form of warning message.
CSV:
```csv
aaa,bbb,ccc¬
xxx, "y, yy" ,zzz¬
```
JSON:
```json
[ ["aaa", "bbb", "ccc"],
["xxx", "y, yy", "zzz"] ]
```
10. It is possible to enclose every field in double quotes even if they don't
need to be enclosed. However it is RECOMMENDED to only enclose fields in
double quotes that requires it.
CSV:
```csv
"aaa","bbb","ccc"¬
"xxx",yyy,zzz¬
```
JSON:
```json
[ ["aaa", "bbb", "ccc"],
["xxx", "yyy", "zzz"] ]
```
11. All fields are always strings. CSV itself does not support type casting to
integers, floats, booleans, or anything else. It is not a CSV library's
responsibility to type cast input CSV data.
If type casting is required, it is up to the developer using a specific CSV
library to ensure types are correctly dealt with.
Input JSON:
```json
[ [10, true, 0.3, null, "aaa"],
[11, false, 2.13, "", "bbb"] ]
```
Output CSV:
```csv
10,true,0.3,,aaa¬
11,false,2.13,,bbb¬
```
Output CSV parsed back to JSON:
```json
[ ["10", "true", "0.3", "", "aaa"],
["11", "false", "2.13", "", "bbb"] ]
```
At this point it is up to the developer themselves to type cast the above
output data from the CSV parser.
12. However, when rendering type cast input data to CSV text, non-string types
MUST be converted to a string in such a way that minimal information is
lost.
- Integers and floats MUST be rendered as a string version of themselves.
- Booleans `true` and `false` MUST be rendered as `true` and `false`
strings, not as `1` or `0` numbers. If numbers are used the resulting
CSV data is indistinguishable from actual integer numbers.
- `Null`/`nil` values MUST be rendered as empty strings.
13. When parsing input CSV data all forms of line breaks (CRLF, LF, and CR) MUST
be supported.
14. When rendering output CSV data, CRLF MUST be used for line breaks to ensure
maximum cross-platform compatibility.
About
-----
This CSV specification is authored by [Jim Myhrberg](https://jimeh.me/).
If you'd like to leave feedback,
please [open an issue on GitHub](https://github.com/parsecsv/csv-spec/issues).
License
-------
[CC0 1.0 Universal](http://creativecommons.org/publicdomain/zero/1.0/)