#!/usr/bin/env bash
# table of blocks and descriptions, to identify where a codepoint belongs
# Copyright (for organisation) © 2012, 2013 Ken Moffat,
# covered by the MIT license, http://opensource.org/licenses/MIT

# Thought to be accurate for unicode-8.0
# The names and ranges of the blocks are
# copyright 1991-2015 The Unicode Consortium
# http://www.unicode.org/copyright.html
#
# Note that ALL possible planes are listed - CODE2001 contains some
# private use codes in planes E and F.

# uses decimal for the maths

# number of last block in current table
MAXBLOCK=302

blockends=( # decimal value for first codepoint of *next* block
[0]=128
[1]=256
[2]=384
[3]=592
[4]=688
[5]=768
[6]=880
[7]=1024
[8]=1280
[9]=1328
[10]=1424
[11]=1536
[12]=1792
[13]=1872
[14]=1920
[15]=1984
[16]=2048
[17]=2112
[18]=2144
[19]=2208
[20]=2304
[21]=2432
[22]=2560
[23]=2688
[24]=2816
[25]=2944
[26]=3072
[27]=3200
[28]=3328
[29]=3456
[30]=3584
[31]=3712
[32]=3840
[33]=4096
[34]=4256
[35]=4352
[36]=4608
[37]=4992
[38]=5024
[39]=5120
[40]=5760
[41]=5792
[42]=5888
[43]=5920
[44]=5952
[45]=5984
[46]=6016
[47]=6144
[48]=6320
[49]=6400
[50]=6480
[51]=6528
[52]=6624
[53]=6656
[54]=6688
[55]=6832
[56]=6912
[57]=7040
[58]=7104
[59]=7168
[60]=7248
[61]=7296
[62]=7360
[63]=7376
[64]=7424
[65]=7552
[66]=7616
[67]=7680
[68]=7936
[69]=8192
[70]=8304
[71]=8352
[72]=8400
[73]=8448
[74]=8528
[75]=8592
[76]=8704
[77]=8960
[78]=9216
[79]=9280
[80]=9312
[81]=9472
[82]=9600
[83]=9632
[84]=9728
[85]=9984
[86]=10176
[87]=10224
[88]=10240
[89]=10496
[90]=10624
[91]=10752
[92]=11008
[93]=11264
[94]=11360
[95]=11392
[96]=11520
[97]=11568
[98]=11648
[99]=11744
[100]=11776
[101]=11904
[102]=12032
[103]=12256
[104]=12272
[105]=12288
[106]=12352
[107]=12448
[108]=12544
[109]=12592
[110]=12688
[111]=12704
[112]=12736
[113]=12784
[114]=12800
[115]=13056
[116]=13312
[117]=19904
[118]=19968
[119]=40960
[120]=42128
[121]=42192
[122]=42240
[123]=42560
[124]=42656
[125]=42752
[126]=42784
[127]=43008
[128]=43056
[129]=43072
[130]=43136
[131]=43232
[132]=43264
[133]=43312
[134]=43360
[135]=43392
[136]=43488
[137]=43520
[138]=43616
[139]=43648
[140]=43744
[141]=43776
[142]=43824
[143]=43888
[144]=43968
[145]=44032
[146]=55216
[147]=55296
[148]=56192
[149]=56320
[150]=57344
[151]=63744
[152]=64256
[153]=64336
[154]=65024
[155]=65040
[156]=65056
[157]=65072
[158]=65104
[159]=65136
[160]=65280
[161]=65520
[162]=65536
[163]=65664
[164]=65792
[165]=65856
[166]=65936
[167]=66000
[168]=66048
[169]=66176
[170]=66208
[171]=66272
[172]=66304
[173]=66352
[174]=66384
[175]=66432
[176]=66464
[177]=66528
[178]=66560
[179]=66640
[180]=66688
[181]=66736
[182]=66816
[183]=66864
[184]=66928
[185]=67072
[186]=67456
[187]=67584
[188]=67648
[189]=67680
[190]=67712
[191]=67760
[192]=67808
[193]=67840
[194]=67872
[195]=67904
[196]=67968
[197]=68000
[198]=68096
[199]=68192
[200]=68224
[201]=68256
[202]=68288
[203]=68352
[204]=68416
[205]=68448
[206]=68480
[207]=68528
[208]=68608
[209]=68688
[210]=68736
[211]=68864
[212]=69216
[213]=69248
[214]=69632
[215]=69760
[216]=69840
[217]=69888
[218]=69968
[219]=70016
[220]=70112
[221]=70144
[222]=70224
[223]=70272
[224]=70320
[225]=70400
[226]=70528
[227]=70784
[228]=70880
[229]=71040
[230]=71168
[231]=71264
[232]=71296
[233]=71376
[234]=71424
[235]=71488
[236]=71840
[237]=71936
[238]=72384
[239]=72448
[240]=73728
[241]=74752
[242]=74880
[243]=75088
[244]=77824
[245]=78896
[246]=82944
[247]=83584
[248]=92160
[249]=92736
[250]=92784
[251]=92880
[252]=92928
[253]=93072
[254]=93952
[255]=94112
[256]=110592
[257]=110848
[258]=113664
[259]=113824
[260]=113840
[261]=118784
[262]=119040
[263]=119296
[264]=119376
[265]=119552
[266]=119648
[267]=119680
[268]=119808
[269]=120832
[270]=121520
[271]=124928
[272]=125152
[273]=126464
[274]=126720
[275]=126976
[276]=127024
[277]=127136
[278]=127232
[279]=127488
[280]=127744
[281]=128512
[282]=128592
[283]=128640
[284]=128768
[285]=128896
[286]=129024
[287]=129280
[288]=129536
[289]=131072
[290]=173824
[291]=177984
[292]=178208
[293]=183984
[294]=194560
[295]=195104
[296]=196608
[297]=917504
[298]=917632
[299]=917760
[300]=918000
[301]=983040
[302]=1048576
)

blocknames=(
[0]="Basic Latin, U+0000-007F"
[1]="Latin-1 Supplement, U+0080-00FF"
[2]="Latin Extended-A, U+0100-017F"
[3]="Latin Extended-B, U+0180-024F"
[4]="IPA Extensions, U+0250-02AF"
[5]="Spacing Modifier Letters, U+02B0-02FF"
[6]="Combining Diacritical Marks, U+0300-036F"
[7]="Greek and Coptic, U+0370-03FF"
[8]="Cyrillic, U+0400-04FF"
[9]="Cyrillic Supplement, U+0500-052F"
[10]="Armenian, U+0530-058F"
[11]="Hebrew, U+0590-05FF"
[12]="Arabic, U+0600-06FF"
[13]="Syriac, U+0700-074F"
[14]="Arabic Supplement, U+0750-077F"
[15]="Thaana,U+0780-07BF"
[16]="NKo, U+07C0-07FF"
[17]="Samaritan, U+0800-083F"
[18]="Mandaic, U+0840-085F"
[19]="unassigned, U+0860-089F"
[20]="Arabic Extended-A, U+08A0-08FF"
[21]="Devanagari, U+0900-097F"
[22]="Bengali, U+0980-09FF"
[23]="Gurmukhi, U+0A00-0A7F"
[24]="Gujarati, U+0A80-0AFF"
[25]="Oriya, U+0B00-0B7F"
[26]="Tamil, U+0B80-0BFF"
[27]="Telugu, U+0c00-0C7F"
[28]="Kannada, U+0C80-0CFF"
[29]="Malayalam, U+0D00-0D7F"
[30]="Sinhala, U+0D80-0DFF"
[31]="Thai, U+0E00-0E7F"
[32]="Lao, U+0E80-0EFF"
[33]="Tibetan, U+0F00-0FFF"
[34]="Myanmar, U+1000-109F"
[35]="Georgian, U+10A0-10FF"
[36]="Hangul Jamo, U+1100-11FF"
[37]="Ethiopic, U+1200-137F"
[38]="Ethiopic Supplement, U+1380-139F"
[39]="Cherokee, U+13A0-13FF"
[40]="Unified Canadian Aboriginal Syllabics, U+1400-167F"
[41]="Ogham, U+1680-169F"
[42]="Runic, U+16A0-16FF"
[43]="Tagalog, U+1700-171F"
[44]="Hanunoo, U+1720-173F"
[45]="Buhid, U+1740-175F"
[46]="Tagbanwa, U+1760-177F"
[47]="Khmer, U+1780-17FF"
[48]="Mongolian, U+1800-18AF"
[49]="Unified Canadian Aboriginal Syllabics Extended, U+18B0-18FF"
[50]="Limbu, U+1900-194F"
[51]="Tai Le, U+1950-197F"
[52]="New Tai Lue, U+1980-19DF"
[53]="Khmer Symbols, U+19E0-19FF"
[54]="Buginese, U+1A00-1A1F"
[55]="Thai Tham, U+1A20-1AAF"
[56]="Combining Diacritical Marks Extended, U+1AB0-1AFF"
[57]="Balinese, U+1B00-1B7F"
[58]="Sundanese, U+1B80-1B8F"
[59]="Batak, U+1BC0-1BFF"
[60]="Lepcha, U+1C00-1C4F"
[61]="Ol Chiki, U+1C50-1C7F"
[62]="unassigned, U+1C80-1CBF"
[63]="Sundanese Supplement, U+1CC0-1CCF"
[64]="Vedic Extensions, U+1CD0-1CFF"
[65]="Phonetic Extensions, U+1D00-1D7F"
[66]="Phonetic Extensions Supplement, U+1D80-1DBF"
[67]="Combining Diacritical Marks Supplement, U+1DC0-1DFF"
[68]="Latin Extended Additional, U+1E00-1EFF"
[69]="Greek Extended, U+1F00-1FFF"
[70]="General Punctuation, U+2000-206F"
[71]="Superscripts and Subscripts, U+2070-209F"
[72]="Currency Symbols, U+20A0-20CF"
[73]="Combining Diacritical Marks for Symbols, U+20D0-20FF"
[74]="Letterlike Symbols, U+2100-214F"
[75]="Number Forms, U+2150-218F"
[76]="Arrows, U+2190-21FF"
[77]="Mathematical Operators, U+2200-22FF"
[78]="Miscellaneous Technical, U+2300-23FF"
[79]="Control Pictures, U+2400-243F"
[80]="Optical Character Recognition, U+2440-245F"
[81]="Enclosed Alphanumerics, U+2460-24FF"
[82]="Box Drawing, U+2500-257F"
[83]="Block Elements, U+2580-259F"
[84]="Geometric Shapes, U+25A0-25FF"
[85]="Miscellaneous Symbols, U+2600-26FF"
[86]="Dingbats, U+2700-27BF"
[87]="Miscellaneous Mathematical Symbols-A, U+27C0-27EF"
[88]="Supplemental Arrows-A, U+27F0-27FF"
[89]="Braille Patterns, U+2800-28FF"
[90]="Supplemental Arrows-B, U+2900-297F"
[91]="Miscellaneous Mathematical Symbols-B, U+2980-29FF"
[92]="Supplemental Mathematical Operators, U+2A00-2AFF"
[93]="Miscellaneous Symbols and Arrows, U+2B00-2BFF"
[94]="Glagolitic, U+2C00-2C5F"
[95]="Latin Extended-C, U+2C60-2C7F"
[96]="Coptic, U+2C80-2CFF"
[97]="Georgian Supplement, U+2D00-2D2F"
[98]="Tifinagh, U+2D30-2D7F"
[99]="Ethiopic Extended, U+2D80-2DDF"
[100]="Cyrillic Extended-A, U+2DE0-2DFF"
[101]="Supplemental Punctuation, U+2E00-2E7F"
[102]="CJK Radicals Supplement, U+2E80-2EFF"
[103]="Kangxi Radicals, U+2F00-2FDF"
[104]="unassigned, U+2FE0-2FEF"
[105]="Ideographic Description Characters, U+2FF0-2FFF"
[106]="CJK Symbols and Punctuation, U+3000-303F"
[107]="Hiragana, U+3040-309F"
[108]="Katakana, U+30A0-30FF"
[109]="Bopomofo, U+3100-312F"
[110]="Hangul Compatability Jamo, U+3130-318F"
[111]="Kanbun, U+3190-319F"
[112]="Bopomofo Extended, U+31A0-31BF"
[113]="CJK Strokes, U+31C0-31EF"
[114]="Katakana Phonetic Extensions, U+31F0-31FF"
[115]="Enclosed CJK Letters and Months, U+3200-32FF"
[116]="CJK Compatability, U+3300-33FF"
[117]="CJK Unified Ideographs Extension-A, U+3400-4DBF"
[118]="Yijing Hexagram Symbols, U+4DC0-4DFF"
[119]="CJK Unified Ideographs, U+4E00-9FFF"
[120]="Yi Syllables, U+A000-A48F"
[121]="Yi Radicals, U+A490-A4CF"
[122]="Lisu, U+A4D0-A4FF"
[123]="Vai, U+A500-A63F"
[124]="Cyrillic Extended-B, U+A640-A69F"
[125]="Bamum, U+A6A0-A6FF"
[126]="Modifier Tone Letters, U+A700-A71F"
[127]="Latin Extended-D, U+A720-A7FF"
[128]="Syloti Nagri, U+A800-A82F"
[129]="Common Indic Number Forms, U+A830-A83F"
[130]="Phags-pa, U+A840-A87F"
[131]="Saurashtra, U+A880-A8DF"
[132]="Devanagari Extended, U+A8E0-A8FF"
[133]="Kayah Li, U+A900-A92F"
[134]="Rejang, U+A930-A95F"
[135]="Hangul Jamo Extended-A, U+A960-A97F"
[136]="Javanese, U+A980-A9DF"
[137]="Myanmar Extended-B, U+A9E0-A9FF"
[138]="Cham, U+AA00-AA5F"
[139]="Myanmar Extended-A, U+AA60-AA7F"
[140]="Tai Viet, U+AA80-AADF"
[141]="Meetei Mayek Extensions, U+AAE0-AAFF"
[142]="Ethiopic Extended-A, U+AB00-AB2F"
[143]="Latin Extended-E, U+AB30-AB6F"
[144]="Cherokee Supplement, U+AB70-ABBF"
[145]="Meetei Mayek, U+ABC0-ABFF"
[146]="Hangul Syllables, U+AC00-D7AF"
[147]="Hangul Jamo Extended-B, U+D7B0-D7FF"
[148]="High Surrogates, U+D800-DB7F"
[149]="High Private Use Surrogates, U+DB80-DBFF"
[150]="Low Surrogates, U+DC00-DFFF"
[151]="Private Use Area, U+E000-F8FF"
[152]="CJK Compatability Ideographs, U+F900-FAFF"
[153]="Alphabetic Presentation Forms, U+FB00-FB4F"
[154]="Arabic Presentation Forms-A, U+FB50-FDFF"
[155]="Variation Selectors, U+FE00-FE0F"
[156]="Vertical Forms, U+FE10-FE1F"
[157]="Combining Half Marks, U+FE20-FE2F"
[158]="CJK Compatability Forms, U+FE30-FE4F"
[159]="Small Form Variants, U+FE50-FE6F"
[160]="Arabic Presentation Forms-B, U+FE70-FEFF"
[161]="Halfwidth and Fullwidth Forms, U+FF00-FFEF"
[162]="Specials, U+FFF0-FFFF"
[163]="Linear B Syllabary, U+10000-1007F"
[164]="Linear B Ideograms, U+10080-100FF"
[165]="Aegean Numbers, U+10100-1013F"
[166]="Ancient Greek Numbers, U+10140-1018F"
[167]="Ancient Symbols, U+10190-101CF"
[168]="Phaistos Disc, U+101D0-101FF"
[169]="unassigned, U+10200-1027F"
[170]="Lycian, U+10280-1029F"
[171]="Carian, U+102A0-102DF"
[172]="Coptic Epact Numbers, U+102E0-102FF"
[173]="Old Italic, U+10300-1032F"
[174]="Gothic, U+10330-1034F"
[175]="Old Permic, U+10350-1037F"
[176]="Ugaritic, U+10380-1039F"
[177]="Old Persian, U+103A0-103DF"
[178]="unassigned, U+103E0-103FF"
[179]="Deseret, U+10400-1044F"
[180]="Shavian, U+10450-1047F"
[181]="Osmanya, U+10480-104AF"
[182]="unassigned, U+104B0-104FF"
[183]="Elbasan, U+10500-1052F"
[184]="Caucasian Albanian, U+10530-1056F"
[185]="unassigned, U+10570-105FF"
[186]="Linear A, U+10600-1077F"
[187]="unassigned, U+10780-107FF"
[188]="Cypriot Syllabary, U+10800-1083F"
[189]="Imperial Aramaic, U+10840-1085F"
[190]="Palmyrene, U+10860-108FF"
[191]="Nabatean, U+10880-108AF"
[192]="unassigned, U+108B0-108DF"
[193]="Hatran, U+108E0-108FF"
[194]="Phoenician, U+10900-1091F"
[195]="Lydian, U+10920-1093F"
[196]="unassigned, U+10940-1097F"
[197]="Meroitic Hieroglyphs, U+10980-1099F"
[198]="Meroitic Cursive, U+109A0-1099F"
[199]="Karoshthi, U+10A00-10A5F"
[200]="Old South Arabian, U+10A60-10A7F"
[201]="Old North Arabian, U+10A80-10A9F"
[202]="unassigned, U+10AA0-10ABF"
[203]="Manichaean, U+10AC0-10AFF"
[204]="Avestan, U+10B00-10B3F"
[205]="Inscriptional Parthian, U+10B40-10B5F"
[206]="Inscriptional Pahlavi, U+10B60-10B7F"
[207]="Psalter Pahlavi, U+10B80-10BAF"
[208]="unassigned, U+10BB0-10BFF"
[209]="Old Turkic, U+10C00-10C4F"
[210]="unassigned, U+10C50-10C7F"
[211]="Old Hungarian, U+10C80-10CFF"
[212]="unassigned, U+10D00-10E5F"
[213]="Rumi Numeral Symbols, U+10E60-10E7F"
[214]="unassigned, U+10E80-10FFF"
[215]="Brahmi, U+11000-1107F"
[216]="Kaithi, U+11080-110CF"
[217]="Sora Sompeng, U+110D0-110FF"
[218]="Chakma, U+11100-1114F"
[219]="Mahajani, U+11150-1117F"
[220]="Sharada, U+11180-111DF"
[221]="Sinhala Archaic Numbers, U+111E0-111FF"
[222]="Khojki, U+11200-1124F"
[223]="unassigned, U+11250-1127F"
[224]="Multani, U+11280-112AF"
[225]="Khudawadi, U+112B0-112FF"
[226]="Grantha, U+11300-1137F"
[227]="unassigned, U+11380-1247F"
[228]="Tirhuta, U+11480-114DF"
[229]="unassigned, U+114E0-1157F"
[230]="Siddham, U+11580-115FF"
[231]="Modi, U+11600-1165F"
[232]="unassigned, U+11660-1167F"
[233]="Takri, U+11680-116CF"
[234]="unassigned, U+116D0-116FF"
[235]="Ahom, U+11700-1173F"
[236]="unassigned, U+11740-1189F"
[237]="Warang Citi, U+118A0-118FF"
[238]="unassigned, U+11900-11ABF"
[239]="Pau Cin Hau, U+11AC0-11AFF"
[240]="unassigned, U+11B00-11FFF"
[241]="Cuneiform, U+12000-123FF"
[242]="Cuneiform Numbers and Punctuation, U+12400-1247F"
[243]="Early Dynastic Cuneiform, U+12480-1254F"
[244]="unassigned, U+12550-12FFF"
[245]="Egyptian Hieroglyphs, U+13000-1342F"
[246]="unassigned, U+13430-143FF"
[247]="Anatolian Hieroglyphs, U+14400-1467F"
[248]="unassigned, U+14680-167FF"
[249]="Bamum Supplement, U+16800-16A3F"
[250]="Mro, U+16A40-16A6F"
[251]="unassigned, U+16A70-16ACF"
[252]="Bassa Vah, U+16AD0-16AFF"
[253]="Pahawh Hmong, U+16B00-16B8F"
[254]="unassigned, U+16B90-16EFF"
[255]="Miao, U+16F00-16F9F"
[256]="unassigned, U+16FA0-1AFFF"
[257]="Kana Supplement, U+1B000-1B0FF"
[258]="unassigned, U+1B100-1BBFF"
[259]="Duployan, U+1BC00-1BC9F"
[260]="Shorthand Format Controls, U+1BCA0-1BCAF"
[261]="unassigned, U+1BCB0-1CFFF"
[262]="Byzantine Musical Symbols, U+1D000-1D0FF"
[263]="Musical Symbols, U+1D100-1D1FF"
[264]="Ancient Greek Musical Notation, U+1D200-1D24F"
[265]="unassigned, U+1D250-1D2FF"
[266]="Tai Xuan Jing Symbols, U+1D300-1D35F"
[267]="Counting Rod Numerals, U+1D360-1D37F"
[268]="unassigned, U+1D380-1D3FF"
[269]="Mathematical Alphanumeric Symbols, U+1D400-1D7FF"
[270]="Sutton Signwriting, U+1D800-1DAAF"
[271]="unassigned, U+1DAB0-1E7FF"
[272]="Mende Kikakui, U+1E800-1E9DF"
[273]="unassigned, U+1E8E0-1EDFF"
[274]="Arabic Mathematical Alphabetic Symbols, U+1EE00-1EEFF"
[275]="unassigned, U+1EF00-1EFFF"
[276]="Mahjong Tiles, U+1F000-1F02F"
[277]="Domino Tiles, U+1F030-1F09F"
[278]="Playing Cards, U+1F0A0-1F0FF"
[279]="Enclosed Alphanumeric Supplement, U+1F100-1F1FF"
[280]="Enclosed Ideographic Supplement, U+1F200-1F2FF"
[281]="Miscellaneous Symbols and Pictographs, U+1F300-1F5FF"
[282]="Emoticons, U+1F600-1F64F"
[283]="Ornamental Dingbats, U+1F650-1F67F"
[284]="Transport and Map Symbols, U+1F680-1F6FF"
[285]="Alchemical Symbols, U+1F700-1F77F"
[286]="Geometric Shapes Extended, U+1F780-1F7FF"
[287]="Supplemental Arrows-C, U+1F800-1F8FF"
[288]="Supplemental Symbols and Pictographs, U+1F900-1F9FF"
[289]="unassigned, U+1FA00-1FFFF"
[290]="CJK Unified Ideographs Extension B, U+20000-2A6DF"
[291]="CJK Unified Ideographs Extension C, U+2A700-2B73F"
[292]="CJK Unified Ideographs Extension D, U+2B740-2B81F"
[293]="CJK Unified Ideographs Extension E, U+2B820-2CEAF"
[294]="unassigned, U+2CEB0-2F7FF"
[295]="CJK Compatibility Ideographs Supplement, U+2F800-2FA1F"
[296]="unassigned, U+2FA20-2FFFF"
[297]="unexpected plane, U+30000-DFFFF"
[298]="Tags, U+E0000-E007F"
[299]="unassigned, U+E0080-E00FF"
[300]="Variation Selectors Supplement, U+E0100-E01EF"
[301]="unassigned, U+E01F0-EFFFF"
[302]="Supplementary Private Use Area-A, U+F0000-FFFFF"
)

