You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
shooflenet/articles/text_editors_with_contented...

207 lines
6.9 KiB

<article>
<p>I've been playing with an exciting feature of HTML5 that I hadn't heard of: the <code class="language-html">contentEditable</code> attribute. It's magical! It just makes anything suddenly be editable in-browser! It also finally provides me with the thing I've been wanting ever since growing dissatisfied with text editors: A way to build a <em>new</em> text editor, <em>without</em> having to actually code up the guts of text movement, manipulation, and entry. Oh, happy day!</p>
<p>More on this as the situation progresses.</p>
<pre><code class="language-grammar">
/*
* This is my parser.
* It's not much, but it's mine.
*/
blocks = code:(if_block / lines)* { return code; }
if_block =
"if" ws "(" cond:string ")" ws "{" result:lines "}"
{ return {type: "if", condition: cond, body: result }; }
/*
while_block = "while" ws "(" cond:string ")" ws "{" result:lines "}"
{ return {type: "while", condition: cond, body: result }; }
*/
lines = lines:line+ ws?
{ return lines; }
line = ws? contents:string ";"
{ return contents; }
ws = (" " / "\n")+ { return ""; }
string = characters:([A-Za-z] [0-9A-Za-z "']*)
{ return characters[0] + characters[1].join(""); }
</code></pre>
<p>Then I started realizing I had no idea what I was doing. So I started defining the grammar for a Lisp! Sort of. More or less.</p>
<pre><code class="language-grammar">
expression =
parenthetical
/ bracketed
/ identifier
/ string
/ number
parenthetical =
"(" cons: expression? cdr:(" "+ expression)* ")"
{
var output = [];
for (var i=0; i&lt;cdr.length; i++) {
output.push(cdr[i][1]);
}
return {type: "parenthetical", first: cons, rest: output};
}
bracketed =
"[" cons: expression? cdr: (" "+ expression)* "]"
{
var output = [cons];
for (var i=0; i&lt;cdr.length; i++) {
output.push(cdr[i][1]);
}
return output;
return {type: "list", contents: output};
}
identifier = contents:[A-Za-z]+ { return contents.join(""); }
string =
'"' contents:[^"]* '"' { return contents.join(""); }
/ "'" contents:[^']* "'" { return contents.join(""); }
number = contents:[0-9]+ { return parseInt(contents.join(""), 10); }
</code></pre>
<p>Perhaps the answer is to break up the editor's coding space by lines, as webkit is wont to do. Specifically, this would be good because it'd make it easier to track the cursor position, because it would never change relative to the line element it's contained in!</p>
<style type="text/css">
#editor div {
margin-left: 1em;
}
.block>code.line {
display: block;
margin-left: 1em;
}
.if>.branch {
display: inline-block;
}
</style>
<div id="editor">
<div class="block">
<code class="line">// a comment!</code>
<code class="line">var a = 5;</code>
<code class="line">var b = foo(a); </code>
<div class="if block">
<div class="if branch">
<div class="condition">
if (<code class="line">b &gt; 100 &amp;&amp; b / 7 == 3</code>)
</div>
<div class="block">
{
<code class="line">// Some results should take place</code>
<code class="line">b += 294800;</code>
}
</div>
</div>
<div class="elseif branch">
<div class="condition">
elseif (<code class="line">b &lt; 20</code>)
</div>
<div class="block">
{
<code class="line">b -= 1;</code>
}
</div>
</div>
<div class="else branch">
<div class="condition">
else
</div>
<div class="block">
{
<code class="line">raise UgnaughtException();</code>
}
</div>
</div>
</div>
<code class="line">// So yeah</code>
</div>
</div>
<p>I just keep writing grammars! Thanks, <a href="http://pegjs.majda.cz/online">pegjs!</a></p>
<pre><code class="language-grammar">
block = "{" contents:lines "}" { return contents; }
lines =
contents:(whitespace (branch / line) whitespace)*
{ var output = [];
for (var i=0; i&lt;contents.length; i++) { output.push(contents[i][1]); }
return output; }
branch =
main:if_branch elseifs:elseif_branch* elses:else_branch?
{ var output = [main];
if (elseifs.length &gt; 0) { output = output.concat(elseifs); }
if (elses) { output.push(elses); }
return output; }
if_branch = whitespace cond:if_line whitespace body:block
{ return {type: "if", condition: cond, consequent: body}; }
if_line = "if" whitespace "(" condition:line ")" { return condition; }
elseif_branch = whitespace cond:elseif_line whitespace body:block
{ return {type: "else if", condition: cond, consequent: body}; }
elseif_line = "else if" whitespace "(" condition:line ")" { return condition; }
else_branch = whitespace cond:else_line whitespace body:block
{ return {type: "else", condition: cond, consequent: body}; }
else_line = "else" { return "else"; }
line = start:word rest:(" "* word)*
{ var output = start;
for (var i=0; i&lt;rest.length; i++) { output += rest[i][0].join("") + rest[i][1]; }
return output; }
word = c:[A-Za-z]+ { return c.join(""); }
whitespace = (" " / "\n")*
</code></pre>
<p>Okay, and here's another lisp one, because they're so easy and satisfying:</p>
<pre><code class="language-grammar">
expression = whitespace e:(parenthetical / string_literal / number / identifier) whitespace
{ return e; }
parenthetical = "(" sequence:expression* ")" { return sequence; }
string_literal =
"'" characters:[^']* "'" { return characters.join(""); }
/ '"' characters:[^"]* '"' { return characters.join(""); }
number = digits:[0-9]+ { return parseInt(digits.join(""), 10); }
identifier = letters:[A-Za-z]+ { return letters.join(""); }
whitespace = [ \n\t]*
</code></pre>
<p>So, it nearly killed me, but I managed to figure out how to make a parser (using pegjs) that <em>actually parses indentation-based grammars!</em> The basic way it works is that as it matches a line, it reads in the number of spaces before it. Using that, it figures out what indentation level it's at - then, as it's returning the line's payload, it puts the payload into the right code block. It's hacky and awful but I think I understand how it works. I'd rather move the logic into the <code>block</code> rule rather than the <code>line</code> rule, but that's pretty simple in theory. Here you go:
<pre><code>
{ var code = [];
var indentations = [0];
var current_head = [code]; }
b = line* { return code; }
line =
(spaces:" "* &amp;
{ var level = spaces.length;
if (level &gt; indentations[indentations.length-1]) {
indentations.push(level);
var new_block = [];
current_head[current_head.length-1].push(new_block);
current_head.push(new_block)
} else if (indentations.indexOf(level) == -1) {
return false;
} else {
while (indentations[indentations.length-1] != level) {
indentations.pop()
current_head.pop()
}
}
return true; }
{ return spaces.length; })
payload:("ab" bs:"b"+ "a" { return "ab" + bs.join("") + "a"; })
"\n"
{ current_head[current_head.length-1].push(payload);
return payload; }
</code></pre>
</article>