Oak

str.oak

← Standard library See on GitHub ↗

// libstr is the core string library for Oak.
//
// It provides a set of utility functions for working with strings and data
// encoded in strings in Oak programs.

{
	default: default
	slice: slice
	take: take
	takeLast: takeLast
	reduce: reduce
} := import('std')

// checkRange is a higher-order function that returns a function `checker`,
// which reports whether a given char is within the range [lo, hi], inclusive.
fn checkRange(lo, hi) fn checker(c) {
	p := codepoint(c)
	lo <= p & p <= hi
}

// upper? reports whether a given char is an uppercase ASCII letter
fn upper?(c) c >= 'A' & c <= 'Z'
// lower? reports whether a given char is a lowercase ASCII letter
fn lower?(c) c >= 'a' & c <= 'z'
// digit? reports whether a given char is an ASCII digit
fn digit?(c) c >= '0' & c <= '9'
// space? reports whether a given char is an ASCII whitespace
fn space?(c) if c {
	' ', '\t', '\n', '\r', '\f' -> true
	_ -> false
}
// letter? reports whether a given char is an ASCII letter
fn letter?(c) upper?(c) | lower?(c)
// word? reports whether a given char is a letter or a digit
fn word?(c) letter?(c) | digit?(c)

// join concatenates together a list of strings into a single string, where
// each original string in the list is separated by `joiner`. Joiner is the
// empty string by default.
fn join(strings, joiner) {
	joiner := default(joiner, '')
	if len(strings) {
		0 -> ''
		_ -> strings |> slice(1) |> reduce(strings.0, fn(a, b) a + joiner + b)
	}
}

// startsWith? reports whether a string starts with the substring `prefix`.
fn startsWith?(s, prefix) s |> take(len(prefix)) = prefix

// endsWith? reports whether a string ends with the substring `suffix`.
fn endsWith?(s, suffix) s |> takeLast(len(suffix)) = suffix

// _matchesAt? is an internal helper that reports whether a given string `s`
// contains the substring `substr` at index `idx`. It performs this comparison
// efficiently, without unnecessary copying compared to other approaches.
fn _matchesAt?(s, substr, idx) if len(substr) {
	0 -> true
	1 -> s.(idx) = substr
	_ -> {
		max := len(substr)
		fn sub(i) if i {
			max -> true
			_ -> if s.(idx + i) {
				substr.(i) -> sub(i + 1)
				_ -> false
			}
		}
		sub(0)
	}
}

// indexOf returns the first index at which the given substring `substr`
// appears in the string `s`. If the substring does not exist, it returns -1.
fn indexOf(s, substr) {
	max := len(s) - len(substr)
	fn sub(i) if _matchesAt?(s, substr, i) {
		true -> i
		_ -> if i < max {
			true -> sub(i + 1)
			_ -> -1
		}
	}
	sub(0)
}

// rindexOf returns the last index at which the given substring `substr`
// appears in the string `s`. If the substring does not exist, it returns -1.
fn rindexOf(s, substr) {
	max := len(s) - len(substr)
	fn sub(i) if _matchesAt?(s, substr, i) {
		true -> i
		_ -> if i > 0 {
			true -> sub(i - 1)
			_ -> -1
		}
	}
	sub(max)
}

// contains? reports whether the string `s` contains the substring `substr`.
fn contains?(s, substr) indexOf(s, substr) >= 0

// cut splits the given string at most once by the given separator substring.
// If the separator is found in s, it returns the substring before and after
// the separator as the list [before, after]. If the separator is not found, it
// returns [s, ''].
fn cut(s, sep) if idx := indexOf(s, sep) {
	-1 -> [s, '']
	_ -> [
		s |> slice(0, idx)
		s |> slice(idx + len(sep))
	]
}

// lower returns a string where any uppercase letter in `s` has been down-cased.
fn lower(s) s |> reduce('', fn(acc, c) if upper?(c) {
	true -> acc << char(codepoint(c) + 32)
	_ -> acc << c
})

// upper returns a string where any lowercase letter in `s` has been up-cased.
fn upper(s) s |> reduce('', fn(acc, c) if lower?(c) {
	true -> acc << char(codepoint(c) - 32)
	_ -> acc << c
})

fn _replaceNonEmpty(s, old, new) {
	lold := len(old)
	lnew := len(new)
	fn sub(acc, i) if _matchesAt?(acc, old, i) {
		true -> sub(
			slice(acc, 0, i) + new + slice(acc, i + lold)
			i + lnew
		)
		_ -> if i < len(acc) {
			true -> sub(acc, i + 1)
			_ -> acc
		}
	}
	sub(s, 0)
}

// replace returns a string where all occurrences of the substring `old` has
// been replaced by `new` in the string `s`. It does nothing for empty strings.
fn replace(s, old, new) if old {
	'' -> s
	_ -> _replaceNonEmpty(s, old, new)
}

fn _splitNonEmpty(s, sep) {
	coll := []
	lsep := len(sep)
	fn sub(acc, i, last) if _matchesAt?(acc, sep, i) {
		true -> {
			coll << slice(acc, last, i)
			sub(acc, i + lsep, i + lsep)
		}
		_ -> if i < len(acc) {
			true -> sub(acc, i + 1, last)
			_ -> coll << slice(acc, last)
		}
	}
	sub(s, 0, 0)
}

// split splits the string `s` by every occurrence of the substring `sep` in
// it, and returns the result as a list of strings. If `sep` is not specified,
// split returns a list of every character in the string in order.
fn split(s, sep) if sep {
	?, '' -> s |> reduce([], fn(acc, c) acc << c)
	_ -> _splitNonEmpty(s, sep)
}

// helper: repeat the string `pad` until it reaches exactly `n` characters.
fn _extend(pad, n) {
	times := int(n / len(pad))
	part := n % len(pad)

	fn sub(base, i) if i {
		0 -> base << slice(pad, 0, part)
		_ -> sub(base << pad, i - 1)
	}
	sub('', times)
}

// padStart prepends the string s with one or more repetitions of pad until the
// total string is at least n characters long. If len(s) > n, it returns s.
fn padStart(s, n, pad) if len(s) >= n {
	true -> s
	_ -> _extend(pad, n - len(s)) << s
}

// padEnd appends one or more repetitions of pad to the string s until the
// total string is at least n characters long. If len(s) > n, it returns s.
fn padEnd(s, n, pad) if len(s) >= n {
	true -> s
	_ -> s + _extend(pad, n - len(s))
}

fn _trimStartSpace(s) {
	fn subStart(i) if space?(s.(i)) {
		true -> subStart(i + 1)
		_ -> i
	}
	firstNonSpace := subStart(0)
	slice(s, firstNonSpace)
}

fn _trimStartNonEmpty(s, prefix) {
	max := len(s)
	lpref := len(prefix)
	fn sub(i) if i < max {
		true -> if _matchesAt?(s, prefix, i) {
			true -> sub(i + lpref)
			_ -> i
		}
		_ -> i
	}
	idx := sub(0)
	slice(s, idx)
}

// trimStart removes any (potentially repeated) occurrences of the string
// `prefix` from the beginning of string `s`
fn trimStart(s, prefix) if prefix {
	'' -> s
	? -> _trimStartSpace(s)
	_ -> _trimStartNonEmpty(s, prefix)
}

fn _trimEndSpace(s) {
	fn subEnd(i) if space?(s.(i)) {
		true -> subEnd(i - 1)
		_ -> i
	}
	lastNonSpace := subEnd(len(s) - 1)
	slice(s, 0, lastNonSpace + 1)
}

fn _trimEndNonEmpty(s, suffix) {
	lsuf := len(suffix)
	fn sub(i) if i > -1 {
		true -> if _matchesAt?(s, suffix, i - lsuf) {
			true -> sub(i - lsuf)
			_ -> i
		}
		_ -> i
	}
	idx := sub(len(s))
	slice(s, 0, idx)
}

// trimEnd removes any (potentially repeated) occurrences of the string
// `suffix` from the end of string `s`
fn trimEnd(s, suffix) if suffix {
	'' -> s
	? -> _trimEndSpace(s)
	_ -> _trimEndNonEmpty(s, suffix)
}

// trim removes any (potentially repeated) ocucrrences of the string `part`
// from either end of the string `s`. If `part` is not specified, trim removes
// all whitespace from either end of the string `s`.
fn trim(s, part) s |> trimStart(part) |> trimEnd(part)