IB
Size: a a a
IB
A
utf_split(SplitPoints, Text) ->юзается так:
utf_split(SplitPoints, [<<>>], 0, Text).
utf_split([], [<<>> | Acc], _, Text) ->
lists:reverse([Text | Acc]);
utf_split([ Point | SplitPoints ], Acc, Point, Text) ->
utf_split(SplitPoints, [<<>> | Acc], Point, Text);
utf_split([ Point | _ ], _, Pos, _) when Pos > Point ->
error(invalid_utf16_offset);
utf_split(SplitPoints, [CurrentAcc | Acc], Pos, <<Char/utf8, Rest/binary>>) ->
Size = trunc(byte_size(<<Char/utf16>>) / 2),
utf_split(SplitPoints, [<<CurrentAcc/binary, Char/utf8>> | Acc], Pos + Size, Rest).
utf_split([1,5,18], Str).
A
IB
A
utf_split(SplitPoints, Text) ->юзается так:
utf_split(SplitPoints, [<<>>], 0, Text).
utf_split([], [<<>> | Acc], _, Text) ->
lists:reverse([Text | Acc]);
utf_split([ Point | SplitPoints ], Acc, Point, Text) ->
utf_split(SplitPoints, [<<>> | Acc], Point, Text);
utf_split([ Point | _ ], _, Pos, _) when Pos > Point ->
error(invalid_utf16_offset);
utf_split(SplitPoints, [CurrentAcc | Acc], Pos, <<Char/utf8, Rest/binary>>) ->
Size = trunc(byte_size(<<Char/utf16>>) / 2),
utf_split(SplitPoints, [<<CurrentAcc/binary, Char/utf8>> | Acc], Pos + Size, Rest).
utf_split([1,5,18], Str).
SplitPoints =
lists:map(
fun(Entity) ->
{_, Offset} = lists:keyfind(offset, 1, Entity),
{_, Length} = lists:keyfind(length, 1, Entity),
[Offset, Offset + Length]
end, Entities ),
SplitText = utf_split(lists:flatten(SplitPoints), Text),
МБ
NP
ŹR
NP
ŹR
NP
ŹR
ŹR
NP
ŹR
NP
NP
ŹR
NP