libfossil: 000-018.th1ish at [0ab8c75c49]

File th1ish/unit/000-018.th1ish artifact c40bfb8243 part of check-in 0ab8c75c49

/**
    As of 20130620 th1ish supports UTF8 in identifiers.
    It is, however, not very predictable when given
    incompatible encodings (it will assume it is UTF8
    and will do the wrong thing). Give it valid UTF8
    with no funny stuff and it should Do The Right
    Thing.
*/
$print {Demonstrating UTF8 identifiers and strings...}

scope {
    const äbä = "this is äbä"
    assert 228 === [äbä.charAt 10 true]
    assert 228 === [[äbä.charAt 10].charAt 0 true]
    assert 228 === ["ä".charAt 0 true]
    assert "ä" === [äbä.charAt 10]
    assert "ä" === [["ä".charAt 0 true].toUtf8Char]
    const © = "Copyright"
    const ® = "Registered Trademark"
    const ©® = ["".concat © '(' nameof © ') and ' ® ' of nobody in particular']
    $print {Ah, so there IS another use for the nameof keyword...}
    $print nameof äbä '=' äbä
    $print nameof © '=' ©
    $print nameof ® '='  ®
    $print nameof ©® '=' ©®
    assert "®" === [["®".charAt 0 true].toUtf8Char]
    assert "®" === [["©®".charAt 1 true].toUtf8Char]
    assert "®" === "©®".charAt(1,true).toUtf8Char()
    assert "ä" === "äbä".substr(0,1)
    assert "©" === "äb©®ä".substr(2,1)
    assert "©®" === "äb©®ä".substr(2,2)
}

scope {

    /**
        thi1sh implements the whole published table of "one to one"
        to-upper/lower mappings. It does NOT do any of the special
        cases (e.g. German sharp S).
    */
    var low = "äb"
    var up =  "ÄB"
    assert [low.toUpper] === up
    assert [up.toLower] === low
    assert [low.toLower] === low
    assert [up.toUpper] === up
    //$print low low.toUpper() up up.toLower()

    assert "ÄB©®©Ä" === "äb©®©ä".toUpper()
    assert "äb©®©ä" === "ÄB©®©Ä".toLower()

    var x = [0xdF.toUtf8Char] // German "sharp S"
    assert x === [x.toUpper]
    assert x === [x.toLower]
}

scope {
    assert 1 === "äbä".indexOf("b")
    assert 2 === "äbÄ".indexOf("Ä")
    assert 0 > "äbä".indexOf("c")
    assert 1 === "äbÄxx".indexOf("bÄ")
    assert 0 === "äbÄxx".indexOf("ä")
    assert 2 === "äbÄxx".indexOf("Ä")
    assert 2 === "äb©®ä".indexOf("©")
    assert 4 === "äb©®©ä".indexOf("©",3)
    assert 4 === "äb©®©ä".indexOf("©",-2)
    assert 0 > "äb©®©ä".indexOf("©",-1)
    assert 5 === "äb©®©ä".indexOf("ä",-1)
}

scope {
    assert 'Н' !== 'H' // weird Cyrillic Н !== normal Western H
    const str = {Weird HËŘĖĎŐĊ identifier}
    assert str === (<<<НËŘĖĎŐĊ Weird HËŘĖĎŐĊ identifier НËŘĖĎŐĊ)
    assert str === <<<НËŘĖĎŐĊ Weird HËŘĖĎŐĊ identifier НËŘĖĎŐĊ

    // Making sure that the end-of-heredoc handling doesn't
    // have any EOF-related mis-handling vis-a-vis sub-parsed
    // constructs:
    print(<<<НËŘĖĎŐĊ Weird HËŘĖĎŐĊ identifier НËŘĖĎŐĊ)
    $print <<<НËŘĖĎŐĊ Weird HËŘĖĎŐĊ identifier НËŘĖĎŐĊ
    [print <<<НËŘĖĎŐĊ   Weird HËŘĖĎŐĊ identifier НËŘĖĎŐĊ]
}

scope {
    assert "\uE4" === "\u00E4"
    assert "äbäb" == "\ue4b\uE4b"
    assert "\u000003d0" === "\u03d0"
    assert "E4" === "\u454" // only parses (2,4,6,8) hex chars
    assert "\u0003d0ab" === "\u03d0ab" // oops: i wanted {03d0}{ab}
    assert {\u0003d0} === "\\u0003d0"
    // Maybe we need a \U to explicitly end such sequences?
    const ex = catch{"\uZ3"}
    assert ex && {The error reporting for these is
        not yet all that helpful because the bits which do this
        don't have enough context to report the error location
        and those which have it can't see exactly where it
        happened.}
}

scope {
    assert 1 === {1}.length()
    print({}.length())
    assert 0 === {}.length()
    assert 1 === [<<<X a X.length]
    assert 0 === [<<<X  X.length]
}