#!/usr/bin/env bash
# table of blocks and descriptions, to identify where a codepoint belongs
# Copyright (for organisation) © 2012, 2013 Ken Moffat,
# covered by the MIT license, http://opensource.org/licenses/MIT

# Thought to be accurate for unicode-6.2
# The names and ranges of the blocks are
# copyright 1991-2013 The Unicode Consortium
# http://www.unicode.org/copyright.html
#
# Note that ALL possible planes are listed - CODE2001 contains some
# private use codes in planes E and F.

# uses decimal for the maths

# number of last block in current table
MAXBLOCK=250

blockends=( # decimal value for first codepoint of *next* block
[0]=128
[1]=256
[2]=384
[3]=592
[4]=688
[5]=768
[6]=880
[7]=1024
[8]=1280
[9]=1328
[10]=1424
[11]=1536
[12]=1792
[13]=1872
[14]=1920
[15]=1984
[16]=2048
[17]=2112
[18]=2144
[19]=2208
[20]=2304
[21]=2432
[22]=2560
[23]=2688
[24]=2816
[25]=2944
[26]=3072
[27]=3200
[28]=3328
[29]=3456
[30]=3584
[31]=3712
[32]=3840
[33]=4096
[34]=4256
[35]=4352
[36]=4608
[37]=4992
[38]=5024
[39]=5120
[40]=5760
[41]=5792
[42]=5888
[43]=5920
[44]=5952
[45]=5984
[46]=6016
[47]=6144
[48]=6320
[49]=6400
[50]=6480
[51]=6528
[52]=6624
[53]=6656
[54]=6688
[55]=6832
[56]=6912
[57]=7040
[58]=7104
[59]=7168
[60]=7248
[61]=7296
[62]=7360
[63]=7376
[64]=7424
[65]=7552
[66]=7616
[67]=7680
[68]=7936
[69]=8192
[70]=8304
[71]=8352
[72]=8400
[73]=8448
[74]=8528
[75]=8592
[76]=8704
[77]=8960
[78]=9216
[79]=8280
[80]=9312
[81]=9472
[82]=9600
[83]=9632
[84]=9728
[85]=9984
[86]=10176
[87]=10224
[88]=10240
[89]=10496
[90]=10624
[91]=10752
[92]=11008
[93]=11264
[94]=11360
[95]=11392
[96]=11520
[97]=11568
[98]=11648
[99]=11744
[100]=11776
[101]=11904
[102]=12032
[103]=12256
[104]=12272
[105]=12288
[106]=12352
[107]=12448
[108]=12544
[109]=12592
[110]=12688
[111]=12704
[112]=12736
[113]=12784
[114]=12800
[115]=13056
[116]=13312
[117]=19904
[118]=19968
[119]=40960
[120]=42128
[121]=42192
[122]=42240
[123]=42560
[124]=42656
[125]=42752
[126]=42784
[127]=43008
[128]=43056
[129]=43072
[130]=43136
[131]=43232
[132]=43264
[133]=43312
[134]=43360
[135]=43392
[136]=43488
[137]=43520
[138]=43818
[139]=43648
[140]=43744
[141]=43776
[142]=43824
[143]=43968
[144]=44032
[145]=55216
[146]=55296
[147]=56192
[148]=56320
[149]=57344
[150]=63744
[151]=64256
[152]=64336
[153]=65024
[154]=65040
[155]=65056
[156]=65072
[157]=65104
[158]=65136
[159]=65280
[160]=65520
[161]=65536
[162]=65664
[163]=65792
[164]=65856
[165]=65936
[166]=66000
[167]=66048
[168]=66176
[169]=66208
[170]=66272
[171]=66304
[172]=66352
[173]=66384
[174]=66432
[175]=66464
[176]=66528
[177]=66560
[178]=66640
[179]=66688
[180]=66736
[181]=67584
[182]=67648
[183]=67680
[184]=67840
[185]=67872
[186]=67904
[187]=67968
[188]=68000
[189]=68096
[190]=68192
[191]=68224
[192]=68352
[193]=68416
[194]=68448
[195]=68480
[196]=68608
[197]=68688
[198]=69216
[199]=69248
[200]=69632
[201]=69760
[202]=69840
[203]=69888
[204]=69968
[205]=70016
[206]=70112
[207]=71296
[208]=71376
[209]=73728
[210]=74752
[211]=74880
[212]=77824
[213]=78896
[214]=92160
[215]=92736
[216]=93952
[217]=94112
[218]=110592
[219]=110848
[220]=118784
[221]=119040
[222]=119296
[223]=119376
[224]=119552
[225]=119648
[226]=119680
[227]=119808
[228]=120960
[229]=126464
[230]=126720
[231]=126976
[232]=127024
[233]=127136
[234]=127232
[235]=127488
[236]=127744
[237]=128512
[238]=128592
[239]=128640
[240]=128768
[241]=128896
[242]=131072
[243]=173824
[244]=177984
[245]=178208
[246]=194560
[247]=195104
[248]=196608
[249]=917504
[250]=1048576
)

blocknames=(
[0]="C0 Controls and Basic Latin, U+0000-007F"
[1]="C1 Controls and Latin-1 Supplement, U+0080-00FF"
[2]="Latin Extended-A, U+0100-017F"
[3]="Latin Extended-B, U+0180-024F"
[4]="IPA Extensions, U+0250-02AF"
[5]="Spacing Modifier Letters, U+02B0-02FF"
[6]="Combining Diacritical Marks, U+0300-036F"
[7]="Greek and Coptic, U+0370-03FF"
[8]="Cyrillic, U+0400-04FF"
[9]="Cyrillic Supplement, U+0500-052F"
[10]="Armenian, U+0530-058F"
[11]="Hebrew, U+0590-05FF"
[12]="Arabic, U+0600-06FF"
[13]="Syriac, U+0700-074F"
[14]="Arabic Supplement, U+0750-077F"
[15]="Thaana,U+0780-07BF"
[16]="N'Ko, U+07C0-07FF"
[17]="Samaritan, U+0800-083F"
[18]="Mandaic, U+0840-085F"
[19]="unassigned, U+0860-089F"
[20]="Arabic Extended-A, U+08A0-08FF"
[21]="Devanagari, U+0900-097F"
[22]="Bengali, U+0980-09FF"
[23]="Gurmukhi, U+0A00-0A7F"
[24]="Gujarati, U+0A80-0AFF"
[25]="Oriya, U+0B00-0B7F"
[26]="Tamil, U+0B80-0BFF"
[27]="Telugu. U+0c00-0C7F"
[28]="Kannada, U+0C80-0CFF"
[29]="Malayalam, U+0D00-0D7F"
[30]="Sinhala, U+0D80-0DFF"
[31]="Thai, U+00E0-0E7F"
[32]="Lao, 0E80-0EFF"
[33]="Tibetan, U+0F00-0FFF"
[34]="Myanmar, U+1000-109F"
[35]="Georgian, U+10A0-10FF"
[36]="Hangul Jamo U+1100-11FF"
[37]="Ethiopic U+1200-137F"
[38]="Ethiopic Supplement U+1380-139F"
[39]="Cherokee U+13A0-13FF"
[40]="Unified Canadian Aboriginal Syllabics U+1400-167F"
[41]="Ogham, U+1680-169F"
[42]="Runic, U+16A0-16FF"
[43]="Tagalog, U+1700-171F"
[44]="Hanunoo, U+1720-173F"
[45]="Buhid, U+1740-175F"
[46]="Tagbanwa, U+1760-177F"
[47]="Khmer, U+1780-17FF"
[48]="Mongolian, U+1800-18AF"
[49]="Unified Canadian Aboriginal Syllabics Extended, U+18B0-18FF"
[50]="Limbu, U+1900-194F"
[51]="Tai Le, U+1950-197F"
[52]="New Tai Lue, U+1980-19DF"
[53]="Khmer Symbols, U+19E0-19FF"
[54]="Buginese, U+1A00-1A1F"
[55]="Thai Tham, U+1A20-1AAF"
[56]="unassigned, U+1AB0-1AFF"
[57]="Balinese, U+1B00-1B7F"
[58]="Sundanese, U+1B80-1B8F"
[59]="Batak, U+1BC0-1BFF"
[60]="Lepcha, U+1C00-1C4F"
[61]="Ol Chiki, U+1C50-1C7F"
[62]="unassigned, U+1C80-1CBF"
[63]="Sundanese Supplement, U+1CC0-1CCF"
[64]="Vedic Extensions, U+1CD0-1CFF"
[65]="Phonetic Extensions, U+1D00-1D7F"
[66]="Phonetic Extensions Supplement, U+1D80-1DBF"
[67]="Combining Diacritical Marks Supplement, U+1DC0-1DFF"
[68]="Latin Extended Additional, U+1E00-1EFF"
[69]="Greek Extended, U+1F00-1FFF"
[70]="General Punctuation, U+2000-206F"
[71]="Subscripts and Superscripts, U+2070-209F"
[72]="Currency Symbols, U+20A0-20CF"
[73]="Combining Diacritical Marks for Symbols, U+20D0-20FF"
[74]="Letterlike Symbols, U+2100-214F"
[75]="Number Forms, U+2150-218F"
[76]="Arrows, U+2190-21FF"
[77]="Mathematical Operators, U+2200-22FF"
[78]="Miscellaneous Technical, U+2300-23FF"
[79]="Control Pictures, U+2400-243F"
[80]="Optical Character Recognition, U+2440-245F"
[81]="Enclosed Alphanumerics, U+2460-24FF"
[82]="Box Drawing, U+2500-257F"
[83]="Block Elements, U+2580-259F"
[84]="Geometric Shapes, U+25A0-25FF"
[85]="Miscellaneous Symbols, U+2600-26FF"
[86]="Dingbats, U+2700-27BF"
[87]="Miscellaneous Mathematical Symbols-A, U+27C0-27EF"
[88]="Supplemental Arrows-A, U+27F0-27FF"
[89]="Braille Patterns, U+2800-28FF"
[90]="Supplemental Arrows-B, U+2900-297F"
[91]="Miscellaneous Mathematical Symbols-B, U+2980-29FF"
[92]="Supplemental Mathematical Operators, U+2A00-2AFF"
[93]="Miscellaneous Symbols and Arrows, U+2B00-2BFF"
[94]="Glagolitic, U+2C00-2C5F"
[95]="Latin Extended-C, U+2C60-2C7F"
[96]="Coptic, U+2C80-2CFF"
[97]="Georgian Supplement, U+2D00-2D2F"
[98]="Tifinagh, U+2D30-2D7F"
[99]="Ethiopic Extended, U+2D80-2DDF"
[100]="Cyrillic Extended-A, U+2DE0-2DFF"
[101]="Supplemental Punctuation, U+2E00-2E7F"
[102]="CJK Radicals Supplement, U+2E80-2EFF"
[103]="Kangxi Radicals, U+2F00-2FDF"
[104]="unassigned, U+2FE0-2FEF"
[105]="Ideographic Description Characters, U+2FF0-2FFF"
[106]="CJK Symbols and Punctuation, U+3000-303F"
[107]="Hiragana, U+3040-309F"
[108]="Katakana, U+30A0-30FF"
[109]="Bopomofo, U+3100-312F"
[110]="Hangul Compatability Jamo, U+3130-318F"
[111]="Kanbun, U+3190-319F"
[112]="Bopomofo Extended, U+31A0-31BF"
[113]="CJK Strokes, U+31C0-31EF"
[114]="Katakana Phonetic Extensions, U+31F0-31FF"
[115]="Enclosed CJK Letters and Months, U+3200-32FF"
[116]="CJK Compatability, U+3300-33FF"
[117]="CJK Unified Ideographs Extension-A, U+3400-4DBF"
[118]="Yijing Hexagram Symbols, U+4DC0-4DFF"
[119]="CJK Unified Ideographs, U+4E00-9FFF"
[120]="Yi Syllables, U+A000-A48F"
[121]="Yi Radicals, U+A490-A4CF"
[122]="Lisu, U+A4D0-A4FF"
[123]="Vai, U+A500-A63F"
[124]="Cyrillic Extended-B, U+A640-A69F"
[125]="Bamum, U+A6A0-A6FF"
[126]="Modifier Tone Letters, U+A700-A71F"
[127]="Latin Extended-B, U+A720-A7FF"
[128]="Syloti Nagro, U+A800-A82F"
[129]="Common Indic Number Forms, U+A830-A83F"
[130]="Phags-pa, U+A840-A87F"
[131]="Saurashtra, U+A880-A8DF"
[132]="Davanagari Extended, U+A8E0-A8FF"
[133]="Kayah Li, U+A900-A92F"
[134]="Rejang, U+A930-A95F"
[135]="Hangul Jamo Extended-A, U+A960-A97F"
[136]="Javanese, U+A980-A9DF"
[137]="unassigned, U+A9E0-A9FF"
[138]="Cham, U+AA00-AA5F"
[139]="Mynmar Extended-A, U+AA60-AA7F"
[140]="Tai Viet, U+AA80-AADF"
[141]="Meetei Mayek Extensions, U+AAE0-AAFF"
[142]="Ethiopic Extended-A, U+AB00-AB2F"
[143]="unassigned, U+AB30-ABBF"
[144]="Meetei Mayek, U+ABC0-ABFF"
[145]="Hangul Syllables, U+AC00-D7AF"
[146]="Hangul Jamo Extended-B, U+D7B0-D7FF"
[147]="High Surrogates, U+D800-DB7F"
[148]="High Private Use Surrogates, U+DB80-DBFF"
[149]="Low Surrogates, U+DC00-DFFF"
[150]="Private Use Area, U+E000-F8FF"
[151]="CJK Compatability Ideographs, U+F900-FAFF"
[152]="Alphabetic Presentation Forms, U+FB00-FB4F"
[153]="Arabic Presentation Forms-A, U+FB50-FDFF"
[154]="Variation Selectors, U+FE00-FE0F"
[155]="Vertical Forms, U+FE10-FE1F"
[156]="Combining Half Marks, U+FE20-FE2F"
[157]="CJK Compatability Forms, U+FE30-FE4F"
[158]="Small Form Variants, U+FE50-FE6F"
[159]="Arabic Presentation Forms-B, U+FE70-FEFF"
[160]="Halfwidth and Fullwidth Forms, U+FF00-FFEF"
[161]="Specials, U+FFF0-FFFF"
[162]="Linear B Syllabaty, U+10000-1007F"
[163]="Linear B Ideograms, U+10080-100FF"
[164]="Aegean Numbers, U+10100-1013F"
[165]="Ancient Greek Numbers, U+10140-1018F"
[166]="Ancient Symbols, U+10190-101CF"
[167]="Phaistos Disc, U+101D0-101FF"
[168]="unassigned, U+10200-1027F"
[169]="Lycian, U+10280-1029F"
[170]="Carian, U+102A0-102DF"
[171]="unassigned, U+102E0-102FF"
[172]="Old Italic, U+10300-1032F"
[173]="Gothic, U+10330-1034F"
[174]="unassigned, U+10350-1037F"
[175]="Ugaritic, U+10380-1039F"
[176]="Old Persian, U+103A0-103DF"
[177]="unassigned, U+103E0-103FF"
[178]="Deseret, U+10400-1044F"
[179]="Shavian, U+10450-1047F"
[180]="Osmanya, U+10480-104AF"
[181]="unassigned, U+104B0-107FF"
[182]="Cypriot Syllabary, U+10800-1083F"
[183]="Imperial Aramaic, U+10840-1085F"
[184]="unassigned, U+10860-108FF"
[185]="Phoenician, U+10900-1091F"
[186]="Lydian, U+10920-1093F"
[187]="unassigned, U+10940-1097F"
[188]="Meroitic Hieroglyphs, U+10980-1099F"
[189]="Meroitic Cursive, U+109A0-1099F"
[190]="Karoshthi, U+10A00-10A5F"
[191]="Old South Arabian, U+10A60-10A7F"
[192]="unassigned, U+10A80-10AFF"
[193]="Avestan, U+10B00-10B3F"
[194]="Inscriptional Parthian, U+10B40-10B5F"
[195]="Inscriptional Pahlavi, U+10B60-10B7F"
[196]="unassigned, U+10B80-10BFF"
[197]="Old Turkic, U+10C00-10C4F"
[198]="unassigned, U+10C50-10E5F"
[199]="Rumi Numeral Symbols, U+10E60-10E7F"
[200]="unassigned, U+10E80-10FFF"
[201]="Brahmi, U+11000-1107F"
[202]="Kaithi, U+11080-110CF"
[203]="Sora Sompeng, U+110D0-110FF"
[204]="Chakma, U+11100-1114F"
[205]="unassigned, U+11150-1117F"
[206]="Sharada, U+11180-111DF"
[207]="unassigned, U+111E0-1167F"
[208]="Takri, U+11680-116CF"
[209]="unassigned, U+116D0-11FFF"
[210]="Cuneiform, U+12000-123FF"
[211]="Cuneiform Numbers and Punctuation, U+12400-1247F"
[212]="unassigned, U+12480-12FFF"
[213]="Egyptian Hieroglyphs, U+13000-1342F"
[214]="unassigned, U+13430-167FF"
[215]="Bamum Supplement, U+16800-16A3F"
[216]="unassigned, U+16A40-16EFF"
[217]="Miao, U+16F00-16F9F"
[218]="unassigned, U+16FA0-1AFFF"
[219]="Kana Supplement, U+1B000-1B0FF"
[220]="unassigned, U+1B100-1CFFF"
[221]="Byzantine Musical Symbols, U+1D000-1D0FF"
[222]="Musical Symbols, U+1D100-1D1FF"
[223]="Ancient Greek Musical Notation, U+1D200-1D24F"
[224]="unassigned, U+1D250-1D2FF"
[225]="Tai Xuan Jing Symbols, U+1D300-1D35F"
[226]="Counting Rod Numerals, U+1D360-1D37F"
[227]="unassigned, U+1D380-1D3FF"
[228]="Mathematical Alphanumeric Symbols, U+1D400-1D7FF"
[229]="unassigned, U+1D880-1EDFF"
[230]="Arabic Mathematical Alphabetic Symbols, U+1EE00-1EEFF"
[231]="unassigned, U+1EF00-1EFFF"
[232]="Mahjong Tiles, U+1F000-1F02F"
[233]="Domino Tiles, U+1F030-1F09F"
[234]="Playing Cards, U+1F0A0-1F0FF"
[235]="Enclosed Alphanumeric Supplement, U+1F100-1F1FF"
[236]="Enclosed Ideographic Supplement, U+1F200-1F2FF"
[237]="Miscellaneous Symbols and Pictographs, U+1F300-1F5FF"
[238]="Emoticons, U+1F600-1F64F"
[239]="unassigned, U+1F650-1F67F"
[240]="Transport and Map Symbols, U+1F680-1F6FF"
[241]="Alchemical Symbols, U+1F700-1F77F"
[242]="unassigned, U+1F780-1FFFF"
[243]="CJK Unified Ideographs Extension B, U+20000-2A6DF"
[244]="CJK Unified Ideographs Extension C, U+2A700-2B73F"
[245]="CJK Unified Ideographs Extension D, U+2B740-2B81F"
[246]="unassigned, U+2B820-2F7FF"
[247]="CJK Compatibility Ideographs Supplement, U+2F880-2FA1F"
[248]="unassigned, U+2FA20-2FFF"
[249]="unexpected plane, U+30000 to U+DFFFF"
[250]="private use planes, U+E0000 to U+FFFFF"
)

