/**
As of 20130620 th1ish supports UTF8 in identifiers.
It is, however, not very predictable when given
incompatible encodings (it will assume it is UTF8
and will do the wrong thing). Give it valid UTF8
with no funny stuff and it should Do The Right
Thing.
*/
$print {Demonstrating UTF8 identifiers and strings...}
scope {
const äbä = "this is äbä"
assert 228 === [äbä.charAt 10 true]
assert 228 === [[äbä.charAt 10].charAt 0 true]
assert 228 === ["ä".charAt 0 true]
assert "ä" === [äbä.charAt 10]
assert "ä" === [["ä".charAt 0 true].toUtf8Char]
const © = "Copyright"
const ® = "Registered Trademark"
const ©® = ["".concat © '(' nameof © ') and ' ® ' of nobody in particular']
$print {Ah, so there IS another use for the nameof keyword...}
$print nameof äbä '=' äbä
$print nameof © '=' ©
$print nameof ® '=' ®
$print nameof ©® '=' ©®
assert "®" === [["®".charAt 0 true].toUtf8Char]
assert "®" === [["©®".charAt 1 true].toUtf8Char]
assert "®" === "©®".charAt(1,true).toUtf8Char()
assert "ä" === "äbä".substr(0,1)
assert "©" === "äb©®ä".substr(2,1)
assert "©®" === "äb©®ä".substr(2,2)
}
scope {
/**
thi1sh implements the whole published table of "one to one"
to-upper/lower mappings. It does NOT do any of the special
cases (e.g. German sharp S).
*/
var low = "äb"
var up = "ÄB"
assert [low.toUpper] === up
assert [up.toLower] === low
assert [low.toLower] === low
assert [up.toUpper] === up
//$print low low.toUpper() up up.toLower()
assert "ÄB©®©Ä" === "äb©®©ä".toUpper()
assert "äb©®©ä" === "ÄB©®©Ä".toLower()
var x = [0xdF.toUtf8Char] // German "sharp S"
assert x === [x.toUpper]
assert x === [x.toLower]
}
scope {
assert 1 === "äbä".indexOf("b")
assert 2 === "äbÄ".indexOf("Ä")
assert 0 > "äbä".indexOf("c")
assert 1 === "äbÄxx".indexOf("bÄ")
assert 0 === "äbÄxx".indexOf("ä")
assert 2 === "äbÄxx".indexOf("Ä")
assert 2 === "äb©®ä".indexOf("©")
assert 4 === "äb©®©ä".indexOf("©",3)
assert 4 === "äb©®©ä".indexOf("©",-2)
assert 0 > "äb©®©ä".indexOf("©",-1)
assert 5 === "äb©®©ä".indexOf("ä",-1)
}
scope {
assert 'Н' !== 'H' // weird Cyrillic Н !== normal Western H
const str = {Weird HËŘĖĎŐĊ identifier}
assert str === (<<<НËŘĖĎŐĊ Weird HËŘĖĎŐĊ identifier НËŘĖĎŐĊ)
assert str === <<<НËŘĖĎŐĊ Weird HËŘĖĎŐĊ identifier НËŘĖĎŐĊ
// Making sure that the end-of-heredoc handling doesn't
// have any EOF-related mis-handling vis-a-vis sub-parsed
// constructs:
print(<<<НËŘĖĎŐĊ Weird HËŘĖĎŐĊ identifier НËŘĖĎŐĊ)
$print <<<НËŘĖĎŐĊ Weird HËŘĖĎŐĊ identifier НËŘĖĎŐĊ
[print <<<НËŘĖĎŐĊ Weird HËŘĖĎŐĊ identifier НËŘĖĎŐĊ]
}
scope {
assert "\uE4" === "\u00E4"
assert "äbäb" == "\ue4b\uE4b"
assert "\u000003d0" === "\u03d0"
assert "E4" === "\u454" // only parses (2,4,6,8) hex chars
assert "\u0003d0ab" === "\u03d0ab" // oops: i wanted {03d0}{ab}
assert {\u0003d0} === "\\u0003d0"
// Maybe we need a \U to explicitly end such sequences?
const ex = catch{"\uZ3"}
assert ex && {The error reporting for these is
not yet all that helpful because the bits which do this
don't have enough context to report the error location
and those which have it can't see exactly where it
happened.}
}
scope {
assert 1 === {1}.length()
print({}.length())
assert 0 === {}.length()
assert 1 === [<<<X a X.length]
assert 0 === [<<<X X.length]
}