md.oak
← Standard library
See on GitHub ↗
{
slice: slice
map: map
each: each
take: take
filter: filter
reduce: reduce
every: every
append: append
} := import('std')
{
digit?: digit?
letter?: letter?
space?: space?
word?: word?
lower: lower
indexOf: indexOf
startsWith?: startsWith?
endsWith?: endsWith?
trimStart: trimStart
replace: replace
join: join
split: split
} := import('str')
{
format: format
} := import('fmt')
{
percentDecode: percentDecode
} := import('http')
fn Reader(s) {
i := 0
fn peek s.(i)
fn last s.(i + 1)
fn back if i {
0 -> 0
_ -> i <- i - 1
}
fn next if i {
len(s) -> ?
_ -> {
c := s.(i)
i <- i + 1
c
}
}
fn expect?(prefix) if s |> slice(i) |> startsWith?(prefix) {
true -> {
i <- i + len(prefix)
true
}
_ -> false
}
fn itemIndex(list, it) {
fn sub(i) if i < len(list) {
true -> if list.(i) {
it -> i
_ -> sub(i + 1)
}
_ -> -1
}
sub(0)
}
fn readUntil(c) if index := s |> slice(i) |> itemIndex(c) {
-1 -> ?
_ -> {
substr := s |> slice(i, i + index)
i <- i + index
substr
}
}
fn readUntilPrefix(prefix) {
fn sub(index) if index + len(prefix) <= len(s) -> {
if part := s |> slice(index, index + len(prefix)) {
prefix -> {
substr := s |> slice(i, index)
i <- index
substr
}
_ -> sub(i + 1)
}
}
sub(i)
}
fn readUntilEnd {
substr := s |> slice(i)
i <- len(s)
substr
}
fn readUntilMatchingDelim(left) {
right := if left {
'[' -> ']'
'(' -> ')'
}
fn sub(index, stack) if stack {
0 -> index - 1
_ -> if c := s.(index) {
? -> -1
left -> sub(index + 1, stack + 1)
right -> sub(index + 1, stack - 1)
_ -> sub(index + 1, stack)
}
}
if matchingDelimIdx := sub(i, 1) {
-1 -> ?
_ -> {
substr := s |> slice(i, matchingDelimIdx)
i <- matchingDelimIdx
substr
}
}
}
{
peek: peek
last: last
back: back
next: next
expect?: expect?
readUntil: readUntil
readUntilPrefix: readUntilPrefix
readUntilEnd: readUntilEnd
readUntilMatchingDelim: readUntilMatchingDelim
}
}
fn uword?(c) if word?(c) {
true -> true
_ -> codepoint(c) > 127
}
fn tokenizeText(line) {
reader := Reader(line)
peek := reader.peek
next := reader.next
tokens := ['']
fn push(tok) tokens << tok << ''
fn append(suffix) tokens.(len(tokens) - 1) << suffix
fn sub if c := next() {
? -> ?
'_', '*' -> {
if peek() {
c -> {
next()
push(c + c)
}
_ -> push(c)
}
sub()
}
'\\' -> if d := next() {
? -> ?
_ -> sub(append(d))
}
'`'
'~'
'!', '[', ']', '(', ')' -> sub(push(c))
_ -> sub(append(c))
}
sub()
tokens |> with filter() fn(tok) tok != ''
}
fn unifyTextNodes(nodes, joiner) nodes |> with reduce([]) fn(acc, child) if type(child) {
:string -> if type(acc.(len(acc) - 1)) {
:string -> {
acc.(len(acc) - 1) << joiner << child
acc
}
_ -> acc << child
}
_ -> acc << if child.children {
? -> child
_ -> child.children := unifyTextNodes(child.children, joiner)
}
}
fn parseText(tokens) {
reader := Reader(tokens)
peek := reader.peek
next := reader.next
readUntil := reader.readUntil
readUntilMatchingDelim := reader.readUntilMatchingDelim
fn handleDelimitedRange(tok, tag, nodes, sub) if range := readUntil(tok) {
? -> sub(nodes << tok)
_ -> {
next()
sub(nodes << {
tag: tag
children: parseText(range)
})
}
}
fn sub(nodes) if tok := next() {
? -> nodes
'_' -> handleDelimitedRange('_', :em, nodes, sub)
'__' -> handleDelimitedRange('__', :strong, nodes, sub)
'*' -> handleDelimitedRange('*', :em, nodes, sub)
'**' -> handleDelimitedRange('**', :strong, nodes, sub)
'`' -> handleDelimitedRange('`', :code, nodes, sub)
'~' -> handleDelimitedRange('~', :strike, nodes, sub)
'[' -> if range := readUntilMatchingDelim('[') {
? -> sub(nodes << tok)
['x'] -> {
next()
sub(nodes << {
tag: :checkbox
checked: true
})
}
[' '] -> {
next()
sub(nodes << {
tag: :checkbox
checked: false
})
}
_ -> if c := (next(), next()) {
'(' -> if urlRange := readUntilMatchingDelim(c) {
? -> sub(nodes << tok + join(range) + ']' + c)
_ -> {
next()
sub(nodes << {
tag: :a
href: join(urlRange)
children: parseText(range)
})
}
}
? -> sub(nodes << tok + join(range) + ']')
_ -> sub(nodes << tok + join(range) + ']' + c)
}
}
'!' -> if peek() {
'[' -> if range := (next(), readUntilMatchingDelim('[')) {
? -> sub(nodes << tok + '[')
['x'] -> {
next()
sub(nodes << tok << {
tag: :checkbox
checked: true
})
}
[' '] -> {
next()
sub(nodes << tok << {
tag: :checkbox
checked: false
})
}
_ -> if c := (next(), next()) {
'(' -> if urlRange := readUntilMatchingDelim(c) {
? -> sub(nodes << tok + '[' + join(range) + ']' + c)
_ -> {
next()
sub(nodes << {
tag: :img
alt: join(range)
src: join(urlRange)
})
}
}
? -> sub(nodes << tok + '[' + join(range) + ']')
_ -> sub(nodes << tok + '[' + join(range) + ']' + c)
}
}
_ -> sub(nodes << tok)
}
_ -> sub(nodes << tok)
}
sub([]) |> unifyTextNodes('')
}
fn uListItemLine?(line) if line {
? -> false
_ -> line |> trimStart() |> startsWith?('- ')
}
fn oListItemLine?(line) if line {
? -> false
_ -> {
trimmedStart := line |> trimStart()
if dotIdx := trimmedStart |> indexOf('. ') {
-1, 0 -> false
_ -> trimmedStart |> slice(0, dotIdx) |> split() |> every(digit?)
}
}
}
fn listItemLine?(line) uListItemLine?(line) | oListItemLine?(line)
fn trimUListGetLevel(reader) {
level := len(reader.readUntil('-'))
reader.next()
reader.next()
level
}
fn trimOListGetLevel(reader) {
peek := reader.peek
next := reader.next
fn sub(i) if space?(peek()) {
true -> {
next()
sub(i + 1)
}
_ -> i
}
level := sub(0)
reader.readUntil('.')
next()
if peek() {
' ' -> next()
}
level
}
fn lineNodeType(line) if {
line = ? -> ?
line = '' -> :empty
startsWith?(line, '# ') -> :h1
startsWith?(line, '## ') -> :h2
startsWith?(line, '### ') -> :h3
startsWith?(line, '#### ') -> :h4
startsWith?(line, '##### ') -> :h5
startsWith?(line, '###### ') -> :h6
startsWith?(line, '>') -> :blockquote
startsWith?(line, '```') -> :pre
startsWith?(line, '---'), startsWith?(line, '***') -> :hr
startsWith?(line, '!html') -> :rawHTML
uListItemLine?(line) -> :ul
oListItemLine?(line) -> :ol
_ -> :p
}
fn parse(text) text |> split('\n') |> Reader() |> parseDoc()
fn parseDoc(lineReader) {
fn sub(doc) if nodeType := lineNodeType(lineReader.peek()) {
:h1, :h2, :h3, :h4, :h5, :h6 -> sub(doc << parseHeader(nodeType, lineReader))
:blockquote -> sub(doc << parseBlockQuote(lineReader))
:pre -> sub(doc << parseCodeBlock(lineReader))
:ul, :ol -> sub(doc << parseList(lineReader, nodeType))
:rawHTML -> sub(doc << parseRawHTML(lineReader))
:p -> sub(doc << parseParagraph(lineReader))
:hr -> {
lineReader.next()
sub(doc << { tag: :hr })
}
:empty -> {
lineReader.next()
sub(doc)
}
_ -> doc
}
sub([])
}
fn parseHeader(nodeType, lineReader) {
reader := Reader(lineReader.next())
reader.readUntil(' ')
reader.next()
text := reader.readUntilEnd()
{
tag: nodeType
children: tokenizeText(text) |> parseText()
}
}
fn parseBlockQuote(lineReader) {
peek := lineReader.peek
next := lineReader.next
fn BlockQuoteLineReader(lineReader) {
fn returnIfQuoted(line) if lineNodeType(line) {
:blockquote -> line |> slice(1)
_ -> ?
}
fn peek returnIfQuoted(lineReader.peek())
fn last returnIfQuoted(lineReader.last())
fn back lineReader.back()
fn next if lineNodeType(lineReader.peek()) {
:blockquote -> lineReader.next() |> trimStart('>')
_ -> ?
}
fn expect? ?
fn readUntil(c) lineReader.readdUntil('>' << c) |>
with map() fn(line) line |> slice(1)
fn readUntilPrefix(prefix) lineReader.readUntilPrefix('>' << c) |>
with map() fn(line) line |> slice(1)
fn readUntilMatchingDelim ?
{
peek: peek
last: last
back: back
next: next
expect?: expect?
readUntil: readUntil
readUntilPrefix: readUntilPrefix
readUntilEnd: lineReader.readUntilEnd
readUntilMatchingDelim: readUntilMatchingDelim
}
}
{
tag: :blockquote
children: BlockQuoteLineReader(lineReader) |> parseDoc()
}
}
fn parseCodeBlock(lineReader) {
peek := lineReader.peek
next := lineReader.next
startTag := next()
lang := if rest := startTag |> slice(3) {
'' -> ''
_ -> rest
}
fn sub(lines) if lineNodeType(peek()) {
:pre, ? -> lines
_ -> sub(lines << next())
}
children := sub([])
next()
{
tag: :pre
children: [{
tag: :code
lang: lang
children: unifyTextNodes(children, '\n')
}]
}
}
fn parseRawHTML(lineReader) {
peek := lineReader.peek
next := lineReader.next
startMarkLine := next()
firstLine := startMarkLine |> slice(len('!html '))
fn sub(lines) if lineNodeType(peek()) {
:empty, ? -> lines
_ -> sub(lines << next())
}
children := sub([firstLine])
{
tag: :rawHTML
children: unifyTextNodes(children, '\n')
}
}
fn parseList(lineReader, listType) {
peek := lineReader.peek
next := lineReader.next
fn sub(items) if listItemLine?(peek()) {
false -> items
_ -> {
line := next()
lineType := lineNodeType(line)
reader := Reader(line)
trimmer := if lineType {
:ul -> trimUListGetLevel
:ol -> trimOListGetLevel
}
level := trimmer(reader)
text := reader.readUntilEnd()
listItem := {
tag: :li
level: level
children: tokenizeText(text) |> parseText()
}
if lastItem := items.(len(items) - 1) {
? -> sub(items << listItem)
_ -> if {
lastItem.level = level -> if lineType {
listType -> sub(items << listItem)
_ -> {
lineReader.back()
items
}
}
lastItem.level < level -> {
lineReader.back()
list := parseList(lineReader, lineType)
lastItem.children << list
sub(items)
}
_ -> {
lineReader.back()
items
}
}
}
}
}
children := sub([])
children |> each(fn(child) if child.tag = :li -> child.level := _)
{
tag: listType
children: children
}
}
fn parseParagraph(lineReader) {
peek := lineReader.peek
next := lineReader.next
fn sub(lines) if lineNodeType(peek()) {
:p -> {
text := next()
if [text |> endsWith?(' '), text.(len(text) - 1)] {
[true, _] -> {
lines |> append(text |> take(len(text) - 2) |>
tokenizeText() |> parseText())
sub(lines << { tag: :br })
}
[_, '\\'] -> {
lines |> append(text |> take(len(text) - 1) |>
tokenizeText() |> parseText())
sub(lines << { tag: :br })
}
_ -> sub(lines |> append(tokenizeText(text) |> parseText()))
}
}
_ -> lines
}
{
tag: :p
children: sub([]) |> unifyTextNodes(' ')
}
}
fn compile(nodes) nodes |> map(compileNode) |> join()
fn wrap(tag, node) '<' << tag << '>' << compile(node.children) << '</' << tag << '>'
fn sanitizeAttr(attr) {
attr |> map(fn(c) if c {
'<' -> '<'
'\'' -> '''
'"' -> '"'
_ -> c
})
}
fn sanitizeURL(url) {
encodedURL := url |> percentDecode() |> map(fn(c) if {
word?(c), c = '/', c = ':' -> c
_ -> ''
}) |> lower()
if {
encodedURL |> startsWith?('javascript:')
encodedURL |> startsWith?('data:') -> ''
_ -> sanitizeAttr(url)
}
}
fn compileNode(node) if type(node) {
:string -> node |> map(fn(c) if c {
'&' -> '&'
'<' -> '<'
_ -> c
})
_ -> if node.tag {
:p, :em, :strong, :strike
:h1, :h2, :h3, :h4, :h5, :h6
:pre, :ul, :ol, :li, :blockquote -> wrap(node.tag |> string(), node)
:a -> '<a href="{{0}}">{{1}}</a>' |> format(sanitizeURL(node.href), compile(node.children))
:img -> '<img alt="{{0}}" src="{{1}}"/>' |> format(sanitizeAttr(node.alt), sanitizeURL(node.src))
:code -> if node.lang {
'', ? -> wrap('code', node)
_ -> '<code data-lang="{{0}}">{{1}}</code>' |>
format(sanitizeAttr(node.lang), compile(node.children))
}
:checkbox -> '<input type="checkbox" ' << if node.checked {
true -> 'checked'
_ -> ''
} << '/>'
:br -> '<br/>'
:hr -> '<hr/>'
:rawHTML -> node.children.0
_ -> '<span style="color:red">Unknown Markdown node {{0}}</span>' |> format(string(node) |> compileNode())
}
}
fn transform(text) text |> parse() |> compile()