REPORT-110010 删除lucene里的py腳本

1 year ago · 9156f4439f
7 changed files with 0 additions and 2367 deletions
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/analysis/charfilter/htmlentity.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/analysis/charfilter/htmlentity.py
@ -1,539 +0,0 @@
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import re
 # A simple python script to generate an HTML entity map and a regex alternation
 # for inclusion in HTMLStripCharFilter.jflex.
 def main():
  print get_apache_license()
  codes = {}
  regex = re.compile(r'\s*<!ENTITY\s+(\S+)\s+"&(?:#38;)?#(\d+);"')
  for line in get_entity_text().split('\n'):
    match = regex.match(line)
    if match:
      key = match.group(1)
      if   key == 'quot': codes[key] = r'\"'
      elif key == 'nbsp': codes[key] = ' ';
      else              : codes[key] = r'\u%04X' % int(match.group(2))
  keys = sorted(codes)
  first_entry = True
  output_line = 'CharacterEntities = ( '
  for key in keys:
    new_entry = ('"%s"' if first_entry else ' | "%s"') % key
    first_entry = False
    if len(output_line) + len(new_entry) >= 80:
      print output_line
      output_line = '                   '
    output_line += new_entry
    if key in ('quot','copy','gt','lt','reg','amp'):
      new_entry = ' | "%s"' % key.upper()
      if len(output_line) + len(new_entry) >= 80:
        print output_line
        output_line = '                   '
      output_line += new_entry
  print output_line, ')'
  print '%{'
  print '  private static final Map<String,String> upperCaseVariantsAccepted'
  print '      = new HashMap<String,String>();'
  print '  static {'
  print '    upperCaseVariantsAccepted.put("quot", "QUOT");'
  print '    upperCaseVariantsAccepted.put("copy", "COPY");'
  print '    upperCaseVariantsAccepted.put("gt", "GT");'
  print '    upperCaseVariantsAccepted.put("lt", "LT");'
  print '    upperCaseVariantsAccepted.put("reg", "REG");'
  print '    upperCaseVariantsAccepted.put("amp", "AMP");'
  print '  }'
  print '  private static final CharArrayMap<Character> entityValues'
  print '      = new CharArrayMap<Character>(Version.LUCENE_40, %i, false);' % len(keys)
  print '  static {'
  print '    String[] entities = {'
  output_line = '     '
  for key in keys:
    new_entry = ' "%s", "%s",' % (key, codes[key])
    if len(output_line) + len(new_entry) >= 80:
      print output_line
      output_line = '     '
    output_line += new_entry
  print output_line[:-1]
  print '    };'
  print '    for (int i = 0 ; i < entities.length ; i += 2) {'
  print '      Character value = entities[i + 1].charAt(0);'
  print '      entityValues.put(entities[i], value);'
  print '      String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);'
  print '      if (upperCaseVariant != null) {'
  print '        entityValues.put(upperCaseVariant, value);'
  print '      }'
  print '    }'
  print "  }"
  print "%}"
 def get_entity_text():
 # The text below is taken verbatim from
 # <http://www.w3.org/TR/REC-html40/sgml/entities.html>:
  text = r"""
 F.1. XHTML Character Entities
 XHTML DTDs make available a standard collection of named character entities. Those entities are defined in this section.
 F.1.1. XHTML Latin 1 Character Entities
 You can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-lat1.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-lat1.ent.
 <!-- ...................................................................... -->
 <!-- XML-compatible ISO Latin 1 Character Entity Set for XHTML ............ -->
 <!-- file: xhtml-lat1.ent
     Typical invocation:
       <!ENTITY % xhtml-lat1
           PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
                  "xhtml-lat1.ent" >
       %xhtml-lat1;
     This DTD module is identified by the PUBLIC and SYSTEM identifiers:
       PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
       SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-lat1.ent"
     Revision:  $Id: xhtml-lat1.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI
     Portions (C) International Organization for Standardization 1986:
     Permission to copy in any form is granted for use with conforming
     SGML systems and applications as defined in ISO 8879, provided
     this notice is included in all copies.
 -->
 <!ENTITY nbsp   "&#160;" ><!-- no-break space = non-breaking space, U+00A0 ISOnum -->
 <!ENTITY iexcl  "&#161;" ><!-- inverted exclamation mark, U+00A1 ISOnum -->
 <!ENTITY cent   "&#162;" ><!-- cent sign, U+00A2 ISOnum -->
 <!ENTITY pound  "&#163;" ><!-- pound sign, U+00A3 ISOnum -->
 <!ENTITY curren "&#164;" ><!-- currency sign, U+00A4 ISOnum -->
 <!ENTITY yen    "&#165;" ><!-- yen sign = yuan sign, U+00A5 ISOnum -->
 <!ENTITY brvbar "&#166;" ><!-- broken bar = broken vertical bar, U+00A6 ISOnum -->
 <!ENTITY sect   "&#167;" ><!-- section sign, U+00A7 ISOnum -->
 <!ENTITY uml    "&#168;" ><!-- diaeresis = spacing diaeresis, U+00A8 ISOdia -->
 <!ENTITY copy   "&#169;" ><!-- copyright sign, U+00A9 ISOnum -->
 <!ENTITY ordf   "&#170;" ><!-- feminine ordinal indicator, U+00AA ISOnum -->
 <!ENTITY laquo  "&#171;" ><!-- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum -->
 <!ENTITY not    "&#172;" ><!-- not sign, U+00AC ISOnum -->
 <!ENTITY shy    "&#173;" ><!-- soft hyphen = discretionary hyphen, U+00AD ISOnum -->
 <!ENTITY reg    "&#174;" ><!-- registered sign = registered trade mark sign, U+00AE ISOnum -->
 <!ENTITY macr   "&#175;" ><!-- macron = spacing macron = overline = APL overbar, U+00AF ISOdia -->
 <!ENTITY deg    "&#176;" ><!-- degree sign, U+00B0 ISOnum -->
 <!ENTITY plusmn "&#177;" ><!-- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum -->
 <!ENTITY sup2   "&#178;" ><!-- superscript two = superscript digit two = squared, U+00B2 ISOnum -->
 <!ENTITY sup3   "&#179;" ><!-- superscript three = superscript digit three = cubed, U+00B3 ISOnum -->
 <!ENTITY acute  "&#180;" ><!-- acute accent = spacing acute, U+00B4 ISOdia -->
 <!ENTITY micro  "&#181;" ><!-- micro sign, U+00B5 ISOnum -->
 <!ENTITY para   "&#182;" ><!-- pilcrow sign = paragraph sign, U+00B6 ISOnum -->
 <!ENTITY middot "&#183;" ><!-- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum -->
 <!ENTITY cedil  "&#184;" ><!-- cedilla = spacing cedilla, U+00B8 ISOdia -->
 <!ENTITY sup1   "&#185;" ><!-- superscript one = superscript digit one, U+00B9 ISOnum -->
 <!ENTITY ordm   "&#186;" ><!-- masculine ordinal indicator, U+00BA ISOnum -->
 <!ENTITY raquo  "&#187;" ><!-- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum -->
 <!ENTITY frac14 "&#188;" ><!-- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum -->
 <!ENTITY frac12 "&#189;" ><!-- vulgar fraction one half = fraction one half, U+00BD ISOnum -->
 <!ENTITY frac34 "&#190;" ><!-- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum -->
 <!ENTITY iquest "&#191;" ><!-- inverted question mark = turned question mark, U+00BF ISOnum -->
 <!ENTITY Agrave "&#192;" ><!-- latin capital A with grave = latin capital A grave, U+00C0 ISOlat1 -->
 <!ENTITY Aacute "&#193;" ><!-- latin capital A with acute, U+00C1 ISOlat1 -->
 <!ENTITY Acirc  "&#194;" ><!-- latin capital A with circumflex, U+00C2 ISOlat1 -->
 <!ENTITY Atilde "&#195;" ><!-- latin capital A with tilde, U+00C3 ISOlat1 -->
 <!ENTITY Auml   "&#196;" ><!-- latin capital A with diaeresis, U+00C4 ISOlat1 -->
 <!ENTITY Aring  "&#197;" ><!-- latin capital A with ring above = latin capital A ring, U+00C5 ISOlat1 -->
 <!ENTITY AElig  "&#198;" ><!-- latin capital AE = latin capital ligature AE, U+00C6 ISOlat1 -->
 <!ENTITY Ccedil "&#199;" ><!-- latin capital C with cedilla, U+00C7 ISOlat1 -->
 <!ENTITY Egrave "&#200;" ><!-- latin capital E with grave, U+00C8 ISOlat1 -->
 <!ENTITY Eacute "&#201;" ><!-- latin capital E with acute, U+00C9 ISOlat1 -->
 <!ENTITY Ecirc  "&#202;" ><!-- latin capital E with circumflex, U+00CA ISOlat1 -->
 <!ENTITY Euml   "&#203;" ><!-- latin capital E with diaeresis, U+00CB ISOlat1 -->
 <!ENTITY Igrave "&#204;" ><!-- latin capital I with grave, U+00CC ISOlat1 -->
 <!ENTITY Iacute "&#205;" ><!-- latin capital I with acute, U+00CD ISOlat1 -->
 <!ENTITY Icirc  "&#206;" ><!-- latin capital I with circumflex, U+00CE ISOlat1 -->
 <!ENTITY Iuml   "&#207;" ><!-- latin capital I with diaeresis, U+00CF ISOlat1 -->
 <!ENTITY ETH    "&#208;" ><!-- latin capital ETH, U+00D0 ISOlat1 -->
 <!ENTITY Ntilde "&#209;" ><!-- latin capital N with tilde, U+00D1 ISOlat1 -->
 <!ENTITY Ograve "&#210;" ><!-- latin capital O with grave, U+00D2 ISOlat1 -->
 <!ENTITY Oacute "&#211;" ><!-- latin capital O with acute, U+00D3 ISOlat1 -->
 <!ENTITY Ocirc  "&#212;" ><!-- latin capital O with circumflex, U+00D4 ISOlat1 -->
 <!ENTITY Otilde "&#213;" ><!-- latin capital O with tilde, U+00D5 ISOlat1 -->
 <!ENTITY Ouml   "&#214;" ><!-- latin capital O with diaeresis, U+00D6 ISOlat1 -->
 <!ENTITY times  "&#215;" ><!-- multiplication sign, U+00D7 ISOnum -->
 <!ENTITY Oslash "&#216;" ><!-- latin capital O with stroke = latin capital O slash, U+00D8 ISOlat1 -->
 <!ENTITY Ugrave "&#217;" ><!-- latin capital U with grave, U+00D9 ISOlat1 -->
 <!ENTITY Uacute "&#218;" ><!-- latin capital U with acute, U+00DA ISOlat1 -->
 <!ENTITY Ucirc  "&#219;" ><!-- latin capital U with circumflex, U+00DB ISOlat1 -->
 <!ENTITY Uuml   "&#220;" ><!-- latin capital U with diaeresis, U+00DC ISOlat1 -->
 <!ENTITY Yacute "&#221;" ><!-- latin capital Y with acute, U+00DD ISOlat1 -->
 <!ENTITY THORN  "&#222;" ><!-- latin capital THORN, U+00DE ISOlat1 -->
 <!ENTITY szlig  "&#223;" ><!-- latin small sharp s = ess-zed, U+00DF ISOlat1 -->
 <!ENTITY agrave "&#224;" ><!-- latin small a with grave = latin small a grave, U+00E0 ISOlat1 -->
 <!ENTITY aacute "&#225;" ><!-- latin small a with acute, U+00E1 ISOlat1 -->
 <!ENTITY acirc  "&#226;" ><!-- latin small a with circumflex, U+00E2 ISOlat1 -->
 <!ENTITY atilde "&#227;" ><!-- latin small a with tilde, U+00E3 ISOlat1 -->
 <!ENTITY auml   "&#228;" ><!-- latin small a with diaeresis, U+00E4 ISOlat1 -->
 <!ENTITY aring  "&#229;" ><!-- latin small a with ring above = latin small a ring, U+00E5 ISOlat1 -->
 <!ENTITY aelig  "&#230;" ><!-- latin small ae = latin small ligature ae, U+00E6 ISOlat1 -->
 <!ENTITY ccedil "&#231;" ><!-- latin small c with cedilla, U+00E7 ISOlat1 -->
 <!ENTITY egrave "&#232;" ><!-- latin small e with grave, U+00E8 ISOlat1 -->
 <!ENTITY eacute "&#233;" ><!-- latin small e with acute, U+00E9 ISOlat1 -->
 <!ENTITY ecirc  "&#234;" ><!-- latin small e with circumflex, U+00EA ISOlat1 -->
 <!ENTITY euml   "&#235;" ><!-- latin small e with diaeresis, U+00EB ISOlat1 -->
 <!ENTITY igrave "&#236;" ><!-- latin small i with grave, U+00EC ISOlat1 -->
 <!ENTITY iacute "&#237;" ><!-- latin small i with acute, U+00ED ISOlat1 -->
 <!ENTITY icirc  "&#238;" ><!-- latin small i with circumflex, U+00EE ISOlat1 -->
 <!ENTITY iuml   "&#239;" ><!-- latin small i with diaeresis, U+00EF ISOlat1 -->
 <!ENTITY eth    "&#240;" ><!-- latin small eth, U+00F0 ISOlat1 -->
 <!ENTITY ntilde "&#241;" ><!-- latin small n with tilde, U+00F1 ISOlat1 -->
 <!ENTITY ograve "&#242;" ><!-- latin small o with grave, U+00F2 ISOlat1 -->
 <!ENTITY oacute "&#243;" ><!-- latin small o with acute, U+00F3 ISOlat1 -->
 <!ENTITY ocirc  "&#244;" ><!-- latin small o with circumflex, U+00F4 ISOlat1 -->
 <!ENTITY otilde "&#245;" ><!-- latin small o with tilde, U+00F5 ISOlat1 -->
 <!ENTITY ouml   "&#246;" ><!-- latin small o with diaeresis, U+00F6 ISOlat1 -->
 <!ENTITY divide "&#247;" ><!-- division sign, U+00F7 ISOnum -->
 <!ENTITY oslash "&#248;" ><!-- latin small o with stroke, = latin small o slash, U+00F8 ISOlat1 -->
 <!ENTITY ugrave "&#249;" ><!-- latin small u with grave, U+00F9 ISOlat1 -->
 <!ENTITY uacute "&#250;" ><!-- latin small u with acute, U+00FA ISOlat1 -->
 <!ENTITY ucirc  "&#251;" ><!-- latin small u with circumflex, U+00FB ISOlat1 -->
 <!ENTITY uuml   "&#252;" ><!-- latin small u with diaeresis, U+00FC ISOlat1 -->
 <!ENTITY yacute "&#253;" ><!-- latin small y with acute, U+00FD ISOlat1 -->
 <!ENTITY thorn  "&#254;" ><!-- latin small thorn with, U+00FE ISOlat1 -->
 <!ENTITY yuml   "&#255;" ><!-- latin small y with diaeresis, U+00FF ISOlat1 -->
 <!-- end of xhtml-lat1.ent -->
 F.1.2. XHTML Special Characters
 You can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-special.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-special.ent.
 <!-- ...................................................................... -->
 <!-- XML-compatible ISO Special Character Entity Set for XHTML ............ -->
 <!-- file: xhtml-special.ent
     Typical invocation:
       <!ENTITY % xhtml-special
           PUBLIC "-//W3C//ENTITIES Special for XHTML//EN"
                  "xhtml-special.ent" >
       %xhtml-special;
     This DTD module is identified by the PUBLIC and SYSTEM identifiers:
       PUBLIC "-//W3C//ENTITIES Special for XHTML//EN"
       SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-special.ent"
     Revision:  $Id: xhtml-special.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI
     Portions (C) International Organization for Standardization 1986:
     Permission to copy in any form is granted for use with conforming
     SGML systems and applications as defined in ISO 8879, provided
     this notice is included in all copies.
     Revisions:
 2000-10-28: added &apos; and altered XML Predefined Entities for compatibility
 -->
 <!-- Relevant ISO entity set is given unless names are newly introduced.
     New names (i.e., not in ISO 8879 [SGML] list) do not clash with
     any existing ISO 8879 entity names. ISO 10646 [ISO10646] character
     numbers are given for each character, in hex. Entity values are
     decimal conversions of the ISO 10646 values and refer to the
     document character set. Names are Unicode [UNICODE] names.
 -->
 <!-- C0 Controls and Basic Latin -->
 <!ENTITY lt      "&#38;#60;" ><!-- less-than sign, U+003C ISOnum -->
 <!ENTITY gt      "&#62;" ><!-- greater-than sign, U+003E ISOnum -->
 <!ENTITY amp     "&#38;#38;" ><!-- ampersand, U+0026 ISOnum -->
 <!ENTITY apos    "&#39;" ><!-- The Apostrophe (Apostrophe Quote, APL Quote), U+0027 ISOnum -->
 <!ENTITY quot    "&#34;" ><!-- quotation mark (Quote Double), U+0022 ISOnum -->
 <!-- Latin Extended-A -->
 <!ENTITY OElig   "&#338;" ><!-- latin capital ligature OE, U+0152 ISOlat2 -->
 <!ENTITY oelig   "&#339;" ><!-- latin small ligature oe, U+0153 ISOlat2 -->
 <!-- ligature is a misnomer, this is a separate character in some languages -->
 <!ENTITY Scaron  "&#352;" ><!-- latin capital letter S with caron, U+0160 ISOlat2 -->
 <!ENTITY scaron  "&#353;" ><!-- latin small letter s with caron, U+0161 ISOlat2 -->
 <!ENTITY Yuml    "&#376;" ><!-- latin capital letter Y with diaeresis, U+0178 ISOlat2 -->
 <!-- Spacing Modifier Letters -->
 <!ENTITY circ    "&#710;" ><!-- modifier letter circumflex accent, U+02C6 ISOpub -->
 <!ENTITY tilde   "&#732;" ><!-- small tilde, U+02DC ISOdia -->
 <!-- General Punctuation -->
 <!ENTITY ensp    "&#8194;" ><!-- en space, U+2002 ISOpub -->
 <!ENTITY emsp    "&#8195;" ><!-- em space, U+2003 ISOpub -->
 <!ENTITY thinsp  "&#8201;" ><!-- thin space, U+2009 ISOpub -->
 <!ENTITY zwnj    "&#8204;" ><!-- zero width non-joiner, U+200C NEW RFC 2070 -->
 <!ENTITY zwj     "&#8205;" ><!-- zero width joiner, U+200D NEW RFC 2070 -->
 <!ENTITY lrm     "&#8206;" ><!-- left-to-right mark, U+200E NEW RFC 2070 -->
 <!ENTITY rlm     "&#8207;" ><!-- right-to-left mark, U+200F NEW RFC 2070 -->
 <!ENTITY ndash   "&#8211;" ><!-- en dash, U+2013 ISOpub -->
 <!ENTITY mdash   "&#8212;" ><!-- em dash, U+2014 ISOpub -->
 <!ENTITY lsquo   "&#8216;" ><!-- left single quotation mark, U+2018 ISOnum -->
 <!ENTITY rsquo   "&#8217;" ><!-- right single quotation mark, U+2019 ISOnum -->
 <!ENTITY sbquo   "&#8218;" ><!-- single low-9 quotation mark, U+201A NEW -->
 <!ENTITY ldquo   "&#8220;" ><!-- left double quotation mark, U+201C ISOnum -->
 <!ENTITY rdquo   "&#8221;" ><!-- right double quotation mark, U+201D ISOnum -->
 <!ENTITY bdquo   "&#8222;" ><!-- double low-9 quotation mark, U+201E NEW -->
 <!ENTITY dagger  "&#8224;" ><!-- dagger, U+2020 ISOpub -->
 <!ENTITY Dagger  "&#8225;" ><!-- double dagger, U+2021 ISOpub -->
 <!ENTITY permil  "&#8240;" ><!-- per mille sign, U+2030 ISOtech -->
 <!-- lsaquo is proposed but not yet ISO standardized -->
 <!ENTITY lsaquo  "&#8249;" ><!-- single left-pointing angle quotation mark, U+2039 ISO proposed -->
 <!-- rsaquo is proposed but not yet ISO standardized -->
 <!ENTITY rsaquo  "&#8250;" ><!-- single right-pointing angle quotation mark, U+203A ISO proposed -->
 <!ENTITY euro    "&#8364;" ><!-- euro sign, U+20AC NEW -->
 <!-- end of xhtml-special.ent -->
 F.1.3. XHTML Mathematical, Greek, and Symbolic Characters
 You can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-symbol.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-symbol.ent.
 <!-- ...................................................................... -->
 <!-- ISO Math, Greek and Symbolic Character Entity Set for XHTML .......... -->
 <!-- file: xhtml-symbol.ent
     Typical invocation:
       <!ENTITY % xhtml-symbol
           PUBLIC "-//W3C//ENTITIES Symbols for XHTML//EN"
                  "xhtml-symbol.ent" >
       %xhtml-symbol;
     This DTD module is identified by the PUBLIC and SYSTEM identifiers:
       PUBLIC "-//W3C//ENTITIES Symbols for XHTML//EN"
       SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-symbol.ent"
     Revision:  $Id: xhtml-symbol.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI
     Portions (C) International Organization for Standardization 1986:
     Permission to copy in any form is granted for use with conforming
     SGML systems and applications as defined in ISO 8879, provided
     this notice is included in all copies.
 -->
 <!-- Relevant ISO entity set is given unless names are newly introduced.
     New names (i.e., not in ISO 8879 [SGML] list) do not clash with
     any existing ISO 8879 entity names. ISO 10646 [ISO10646] character
     numbers are given for each character, in hex. Entity values are
     decimal conversions of the ISO 10646 values and refer to the
     document character set. Names are Unicode [UNICODE] names.
 -->
 <!-- Latin Extended-B -->
 <!ENTITY fnof     "&#402;" ><!-- latin small f with hook = function
                              = florin, U+0192 ISOtech -->
 <!-- Greek -->
 <!ENTITY Alpha    "&#913;" ><!-- greek capital letter alpha, U+0391 -->
 <!ENTITY Beta     "&#914;" ><!-- greek capital letter beta, U+0392 -->
 <!ENTITY Gamma    "&#915;" ><!-- greek capital letter gamma, U+0393 ISOgrk3 -->
 <!ENTITY Delta    "&#916;" ><!-- greek capital letter delta, U+0394 ISOgrk3 -->
 <!ENTITY Epsilon  "&#917;" ><!-- greek capital letter epsilon, U+0395 -->
 <!ENTITY Zeta     "&#918;" ><!-- greek capital letter zeta, U+0396 -->
 <!ENTITY Eta      "&#919;" ><!-- greek capital letter eta, U+0397 -->
 <!ENTITY Theta    "&#920;" ><!-- greek capital letter theta, U+0398 ISOgrk3 -->
 <!ENTITY Iota     "&#921;" ><!-- greek capital letter iota, U+0399 -->
 <!ENTITY Kappa    "&#922;" ><!-- greek capital letter kappa, U+039A -->
 <!ENTITY Lambda   "&#923;" ><!-- greek capital letter lambda, U+039B ISOgrk3 -->
 <!ENTITY Mu       "&#924;" ><!-- greek capital letter mu, U+039C -->
 <!ENTITY Nu       "&#925;" ><!-- greek capital letter nu, U+039D -->
 <!ENTITY Xi       "&#926;" ><!-- greek capital letter xi, U+039E ISOgrk3 -->
 <!ENTITY Omicron  "&#927;" ><!-- greek capital letter omicron, U+039F -->
 <!ENTITY Pi       "&#928;" ><!-- greek capital letter pi, U+03A0 ISOgrk3 -->
 <!ENTITY Rho      "&#929;" ><!-- greek capital letter rho, U+03A1 -->
 <!-- there is no Sigmaf, and no U+03A2 character either -->
 <!ENTITY Sigma    "&#931;" ><!-- greek capital letter sigma, U+03A3 ISOgrk3 -->
 <!ENTITY Tau      "&#932;" ><!-- greek capital letter tau, U+03A4 -->
 <!ENTITY Upsilon  "&#933;" ><!-- greek capital letter upsilon,
                              U+03A5 ISOgrk3 -->
 <!ENTITY Phi      "&#934;" ><!-- greek capital letter phi, U+03A6 ISOgrk3 -->
 <!ENTITY Chi      "&#935;" ><!-- greek capital letter chi, U+03A7 -->
 <!ENTITY Psi      "&#936;" ><!-- greek capital letter psi, U+03A8 ISOgrk3 -->
 <!ENTITY Omega    "&#937;" ><!-- greek capital letter omega, U+03A9 ISOgrk3 -->
 <!ENTITY alpha    "&#945;" ><!-- greek small letter alpha, U+03B1 ISOgrk3 -->
 <!ENTITY beta     "&#946;" ><!-- greek small letter beta, U+03B2 ISOgrk3 -->
 <!ENTITY gamma    "&#947;" ><!-- greek small letter gamma, U+03B3 ISOgrk3 -->
 <!ENTITY delta    "&#948;" ><!-- greek small letter delta, U+03B4 ISOgrk3 -->
 <!ENTITY epsilon  "&#949;" ><!-- greek small letter epsilon, U+03B5 ISOgrk3 -->
 <!ENTITY zeta     "&#950;" ><!-- greek small letter zeta, U+03B6 ISOgrk3 -->
 <!ENTITY eta      "&#951;" ><!-- greek small letter eta, U+03B7 ISOgrk3 -->
 <!ENTITY theta    "&#952;" ><!-- greek small letter theta, U+03B8 ISOgrk3 -->
 <!ENTITY iota     "&#953;" ><!-- greek small letter iota, U+03B9 ISOgrk3 -->
 <!ENTITY kappa    "&#954;" ><!-- greek small letter kappa, U+03BA ISOgrk3 -->
 <!ENTITY lambda   "&#955;" ><!-- greek small letter lambda, U+03BB ISOgrk3 -->
 <!ENTITY mu       "&#956;" ><!-- greek small letter mu, U+03BC ISOgrk3 -->
 <!ENTITY nu       "&#957;" ><!-- greek small letter nu, U+03BD ISOgrk3 -->
 <!ENTITY xi       "&#958;" ><!-- greek small letter xi, U+03BE ISOgrk3 -->
 <!ENTITY omicron  "&#959;" ><!-- greek small letter omicron, U+03BF NEW -->
 <!ENTITY pi       "&#960;" ><!-- greek small letter pi, U+03C0 ISOgrk3 -->
 <!ENTITY rho      "&#961;" ><!-- greek small letter rho, U+03C1 ISOgrk3 -->
 <!ENTITY sigmaf   "&#962;" ><!-- greek small letter final sigma, U+03C2 ISOgrk3 -->
 <!ENTITY sigma    "&#963;" ><!-- greek small letter sigma, U+03C3 ISOgrk3 -->
 <!ENTITY tau      "&#964;" ><!-- greek small letter tau, U+03C4 ISOgrk3 -->
 <!ENTITY upsilon  "&#965;" ><!-- greek small letter upsilon, U+03C5 ISOgrk3 -->
 <!ENTITY phi      "&#966;" ><!-- greek small letter phi, U+03C6 ISOgrk3 -->
 <!ENTITY chi      "&#967;" ><!-- greek small letter chi, U+03C7 ISOgrk3 -->
 <!ENTITY psi      "&#968;" ><!-- greek small letter psi, U+03C8 ISOgrk3 -->
 <!ENTITY omega    "&#969;" ><!-- greek small letter omega, U+03C9 ISOgrk3 -->
 <!ENTITY thetasym "&#977;" ><!-- greek small letter theta symbol, U+03D1 NEW -->
 <!ENTITY upsih    "&#978;" ><!-- greek upsilon with hook symbol, U+03D2 NEW -->
 <!ENTITY piv      "&#982;" ><!-- greek pi symbol, U+03D6 ISOgrk3 -->
 <!-- General Punctuation -->
 <!ENTITY bull     "&#8226;" ><!-- bullet = black small circle, U+2022 ISOpub  -->
 <!-- bullet is NOT the same as bullet operator, U+2219 -->
 <!ENTITY hellip   "&#8230;" ><!-- horizontal ellipsis = three dot leader, U+2026 ISOpub  -->
 <!ENTITY prime    "&#8242;" ><!-- prime = minutes = feet, U+2032 ISOtech -->
 <!ENTITY Prime    "&#8243;" ><!-- double prime = seconds = inches, U+2033 ISOtech -->
 <!ENTITY oline    "&#8254;" ><!-- overline = spacing overscore, U+203E NEW -->
 <!ENTITY frasl    "&#8260;" ><!-- fraction slash, U+2044 NEW -->
 <!-- Letterlike Symbols -->
 <!ENTITY weierp   "&#8472;" ><!-- script capital P = power set = Weierstrass p, U+2118 ISOamso -->
 <!ENTITY image    "&#8465;" ><!-- blackletter capital I = imaginary part, U+2111 ISOamso -->
 <!ENTITY real     "&#8476;" ><!-- blackletter capital R = real part symbol, U+211C ISOamso -->
 <!ENTITY trade    "&#8482;" ><!-- trade mark sign, U+2122 ISOnum -->
 <!ENTITY alefsym  "&#8501;" ><!-- alef symbol = first transfinite cardinal, U+2135 NEW -->
 <!-- alef symbol is NOT the same as hebrew letter alef, U+05D0 although
     the same glyph could be used to depict both characters -->
 <!-- Arrows -->
 <!ENTITY larr     "&#8592;" ><!-- leftwards arrow, U+2190 ISOnum -->
 <!ENTITY uarr     "&#8593;" ><!-- upwards arrow, U+2191 ISOnum-->
 <!ENTITY rarr     "&#8594;" ><!-- rightwards arrow, U+2192 ISOnum -->
 <!ENTITY darr     "&#8595;" ><!-- downwards arrow, U+2193 ISOnum -->
 <!ENTITY harr     "&#8596;" ><!-- left right arrow, U+2194 ISOamsa -->
 <!ENTITY crarr    "&#8629;" ><!-- downwards arrow with corner leftwards
                               = carriage return, U+21B5 NEW -->
 <!ENTITY lArr     "&#8656;" ><!-- leftwards double arrow, U+21D0 ISOtech -->
 <!-- Unicode does not say that lArr is the same as the 'is implied by' arrow
    but also does not have any other character for that function. So ? lArr can
    be used for 'is implied by' as ISOtech suggests -->
 <!ENTITY uArr     "&#8657;" ><!-- upwards double arrow, U+21D1 ISOamsa -->
 <!ENTITY rArr     "&#8658;" ><!-- rightwards double arrow, U+21D2 ISOtech -->
 <!-- Unicode does not say this is the 'implies' character but does not have
     another character with this function so ?
     rArr can be used for 'implies' as ISOtech suggests -->
 <!ENTITY dArr     "&#8659;" ><!-- downwards double arrow, U+21D3 ISOamsa -->
 <!ENTITY hArr     "&#8660;" ><!-- left right double arrow, U+21D4 ISOamsa -->
 <!-- Mathematical Operators -->
 <!ENTITY forall   "&#8704;" ><!-- for all, U+2200 ISOtech -->
 <!ENTITY part     "&#8706;" ><!-- partial differential, U+2202 ISOtech  -->
 <!ENTITY exist    "&#8707;" ><!-- there exists, U+2203 ISOtech -->
 <!ENTITY empty    "&#8709;" ><!-- empty set = null set, U+2205 ISOamso -->
 <!ENTITY nabla    "&#8711;" ><!-- nabla = backward difference, U+2207 ISOtech -->
 <!ENTITY isin     "&#8712;" ><!-- element of, U+2208 ISOtech -->
 <!ENTITY notin    "&#8713;" ><!-- not an element of, U+2209 ISOtech -->
 <!ENTITY ni       "&#8715;" ><!-- contains as member, U+220B ISOtech -->
 <!-- should there be a more memorable name than 'ni'? -->
 <!ENTITY prod     "&#8719;" ><!-- n-ary product = product sign, U+220F ISOamsb -->
 <!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
     the same glyph might be used for both -->
 <!ENTITY sum      "&#8721;" ><!-- n-ary sumation, U+2211 ISOamsb -->
 <!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
     though the same glyph might be used for both -->
 <!ENTITY minus    "&#8722;" ><!-- minus sign, U+2212 ISOtech -->
 <!ENTITY lowast   "&#8727;" ><!-- asterisk operator, U+2217 ISOtech -->
 <!ENTITY radic    "&#8730;" ><!-- square root = radical sign, U+221A ISOtech -->
 <!ENTITY prop     "&#8733;" ><!-- proportional to, U+221D ISOtech -->
 <!ENTITY infin    "&#8734;" ><!-- infinity, U+221E ISOtech -->
 <!ENTITY ang      "&#8736;" ><!-- angle, U+2220 ISOamso -->
 <!ENTITY and      "&#8743;" ><!-- logical and = wedge, U+2227 ISOtech -->
 <!ENTITY or       "&#8744;" ><!-- logical or = vee, U+2228 ISOtech -->
 <!ENTITY cap      "&#8745;" ><!-- intersection = cap, U+2229 ISOtech -->
 <!ENTITY cup      "&#8746;" ><!-- union = cup, U+222A ISOtech -->
 <!ENTITY int      "&#8747;" ><!-- integral, U+222B ISOtech -->
 <!ENTITY there4   "&#8756;" ><!-- therefore, U+2234 ISOtech -->
 <!ENTITY sim      "&#8764;" ><!-- tilde operator = varies with = similar to, U+223C ISOtech -->
 <!-- tilde operator is NOT the same character as the tilde, U+007E,
     although the same glyph might be used to represent both  -->
 <!ENTITY cong     "&#8773;" ><!-- approximately equal to, U+2245 ISOtech -->
 <!ENTITY asymp    "&#8776;" ><!-- almost equal to = asymptotic to, U+2248 ISOamsr -->
 <!ENTITY ne       "&#8800;" ><!-- not equal to, U+2260 ISOtech -->
 <!ENTITY equiv    "&#8801;" ><!-- identical to, U+2261 ISOtech -->
 <!ENTITY le       "&#8804;" ><!-- less-than or equal to, U+2264 ISOtech -->
 <!ENTITY ge       "&#8805;" ><!-- greater-than or equal to, U+2265 ISOtech -->
 <!ENTITY sub      "&#8834;" ><!-- subset of, U+2282 ISOtech -->
 <!ENTITY sup      "&#8835;" ><!-- superset of, U+2283 ISOtech -->
 <!-- note that nsup, 'not a superset of, U+2283' is not covered by the Symbol
     font encoding and is not included. Should it be, for symmetry?
     It is in ISOamsn  -->
 <!ENTITY nsub     "&#8836;" ><!-- not a subset of, U+2284 ISOamsn -->
 <!ENTITY sube     "&#8838;" ><!-- subset of or equal to, U+2286 ISOtech -->
 <!ENTITY supe     "&#8839;" ><!-- superset of or equal to, U+2287 ISOtech -->
 <!ENTITY oplus    "&#8853;" ><!-- circled plus = direct sum, U+2295 ISOamsb -->
 <!ENTITY otimes   "&#8855;" ><!-- circled times = vector product, U+2297 ISOamsb -->
 <!ENTITY perp     "&#8869;" ><!-- up tack = orthogonal to = perpendicular, U+22A5 ISOtech -->
 <!ENTITY sdot     "&#8901;" ><!-- dot operator, U+22C5 ISOamsb -->
 <!-- dot operator is NOT the same character as U+00B7 middle dot -->
 <!-- Miscellaneous Technical -->
 <!ENTITY lceil    "&#8968;" ><!-- left ceiling = apl upstile, U+2308 ISOamsc  -->
 <!ENTITY rceil    "&#8969;" ><!-- right ceiling, U+2309 ISOamsc  -->
 <!ENTITY lfloor   "&#8970;" ><!-- left floor = apl downstile, U+230A ISOamsc  -->
 <!ENTITY rfloor   "&#8971;" ><!-- right floor, U+230B ISOamsc  -->
 <!ENTITY lang     "&#9001;" ><!-- left-pointing angle bracket = bra, U+2329 ISOtech -->
 <!-- lang is NOT the same character as U+003C 'less than'
     or U+2039 'single left-pointing angle quotation mark' -->
 <!ENTITY rang     "&#9002;" ><!-- right-pointing angle bracket = ket, U+232A ISOtech -->
 <!-- rang is NOT the same character as U+003E 'greater than'
     or U+203A 'single right-pointing angle quotation mark' -->
 <!-- Geometric Shapes -->
 <!ENTITY loz      "&#9674;" ><!-- lozenge, U+25CA ISOpub -->
 <!-- Miscellaneous Symbols -->
 <!ENTITY spades   "&#9824;" ><!-- black spade suit, U+2660 ISOpub -->
 <!-- black here seems to mean filled as opposed to hollow -->
 <!ENTITY clubs    "&#9827;" ><!-- black club suit = shamrock, U+2663 ISOpub -->
 <!ENTITY hearts   "&#9829;" ><!-- black heart suit = valentine, U+2665 ISOpub -->
 <!ENTITY diams    "&#9830;" ><!-- black diamond suit, U+2666 ISOpub -->
 <!-- end of xhtml-symbol.ent -->
 """
  return text
 def get_apache_license():
  license = r"""/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 """
  return license
 main()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/automaton/UTF32ToUTF8.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/automaton/UTF32ToUTF8.py
@ -1,366 +0,0 @@
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import types
 import os
 import sys
 import random
 MAX_UNICODE = 0x10FFFF
 # TODO
 #   - could be more minimal
 #     - eg when bracket lands on a utf8 boundary, like 3 - 2047 -- they can share the two * edges
 #     - also 3 2048 or 3 65536 -- it should not have an * down the red path, but it does
 # MASKS[0] is bottom 1-bit
 # MASKS[1] is bottom 2-bits
 # ...
 utf8Ranges = [(0, 127),
              (128, 2047),
              (2048, 65535),
              (65536, 1114111)]
 typeToColor = {'startend': 'purple',
               'start': 'blue',
               'end': 'red'}
 class FSA:
  def __init__(self):
    # maps fromNode -> (startUTF8, endUTF8, endNode)
    self.states = {}
    self.nodeUpto = 0
  def run(self, bytes):
    state = self.start
    for b in bytes:
      found = False
      oldState = state
      for label, s, e, n in self.states[state][1:]:
        if b >= s and b <= e:
          if found:
            raise RuntimeError('state %s has ambiguous output for byte %s' % (oldState, b))
          state = n
          found = True
      if not found:
        return -1
    return state
  def addEdge(self, n1, n2, v1, v2, label):
    """
    Adds edge from n1-n2, utf8 byte range v1-v2.
    """
    assert n1 in self.states
    assert type(v1) is types.IntType
    assert type(v2) is types.IntType
    self.states[n1].append((label, v1, v2, n2))
  def addNode(self, label=None):
    try:
      self.states[self.nodeUpto] = [label]
      return self.nodeUpto
    finally:
      self.nodeUpto += 1
  def toDOT(self, label):
    __l = []
    w = __l.append
    endNode = startNode = None
    for id, details in self.states.items():
      name = details[0]
      if name == 'end':
        endNode = id
      elif name == 'start':
        startNode = id
    w('digraph %s {' % label)
    w('  rankdir=LR;')
    w('  size="8,5";')
    w('  node [color=white label=""]; Ns;')
    w('  node [color=black];')
    w('  node [shape=doublecircle, label=""]; N%s [label="%s"];' % (endNode, endNode))
    w('  node [shape=circle];')
    w('  N%s [label="%s"];' % (startNode, startNode))
    w('  Ns -> N%s;' % startNode)
    for id, details in self.states.items():
      edges = details[1:]
      w('  N%s [label="%s"];' % (id, id))
      for type, s, e, dest in edges:
        c = typeToColor.get(type, 'black')
        if type == 'all*':
          # special case -- matches any utf8 byte at this point
          label = '*'
        elif s == e:
          label = '%s' % binary(s)
        else:
          label = '%s-%s' % (binary(s), binary(e))
        w('  N%s -> N%s [label="%s" color="%s"];' % (id, dest, label, c))
      if name == 'end':
        endNode = id
      elif name == 'start':
        startNode = id
    w('}')
    return '\n'.join(__l)
  def toPNG(self, label, pngOut):
    open('tmp.dot', 'wb').write(self.toDOT(label))
    if os.system('dot -Tpng tmp.dot -o %s' % pngOut):
      raise RuntimeException('dot failed')
 MASKS = []
 v = 2
 for i in range(32):
  MASKS.append(v-1)
  v *= 2
 def binary(x):
  if x == 0:
    return '00000000'
  l = []
  while x > 0:
    if x & 1 == 1:
      l.append('1')
    else:
      l.append('0')
    x = x >> 1
  # big endian!
  l.reverse()
  l2 = []
  while len(l) > 0:
    s = ''.join(l[-8:])
    if len(s) < 8:
      s = '0'*(8-len(s)) + s
    l2.append(s)
    del l[-8:]
  return ' '.join(l2)
 def getUTF8Rest(code, numBytes):
  l = []
  for i in range(numBytes):
    l.append((128 | (code & MASKS[5]), 6))
    code = code >> 6
  l.reverse()
  return tuple(l)
 def toUTF8(code):
  # code = Unicode code point
  assert code >= 0
  assert code <= MAX_UNICODE
  if code < 128:
    # 0xxxxxxx
    bytes = ((code, 7),)
  elif code < 2048:
    # 110yyyxx 10xxxxxx
    byte1 = (6 << 5) | (code >> 6)
    bytes = ((byte1, 5),) + getUTF8Rest(code, 1)
  elif code < 65536:
    # 1110yyyy 10yyyyxx 10xxxxxx
    len = 3
    byte1 = (14 << 4) | (code >> 12)
    bytes = ((byte1, 4),) + getUTF8Rest(code, 2)
  else:
    # 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
    len = 4
    byte1 = (30 << 3) | (code >> 18)
    bytes = ((byte1, 3),) + getUTF8Rest(code, 3)
  return bytes
 def all(fsa, startNode, endNode, startCode, endCode, left):
  if len(left) == 0:
    fsa.addEdge(startNode, endNode, startCode, endCode, 'all')
  else:
    lastN = fsa.addNode()
    fsa.addEdge(startNode, lastN, startCode, endCode, 'all')
    while len(left) > 1:
      n = fsa.addNode()
      fsa.addEdge(lastN, n, 128, 191, 'all*')
      left = left[1:]
      lastN = n
    fsa.addEdge(lastN, endNode, 128, 191, 'all*')
 def start(fsa, startNode, endNode, utf8, doAll):
  if len(utf8) == 1:
    fsa.addEdge(startNode, endNode, utf8[0][0], utf8[0][0] | MASKS[utf8[0][1]-1], 'start')
  else:
    n = fsa.addNode()
    fsa.addEdge(startNode, n, utf8[0][0], utf8[0][0], 'start')
    start(fsa, n, endNode, utf8[1:], True)
    end = utf8[0][0] | MASKS[utf8[0][1]-1]
    if doAll and utf8[0][0] != end:
      all(fsa, startNode, endNode, utf8[0][0]+1, end, utf8[1:])
 def end(fsa, startNode, endNode, utf8, doAll):
  if len(utf8) == 1:
    fsa.addEdge(startNode, endNode, utf8[0][0] & ~MASKS[utf8[0][1]-1], utf8[0][0], 'end')
  else:
    if utf8[0][1] == 5:
      # special case -- avoid created unused edges (utf8 doesn't accept certain byte sequences):
      start = 194
    else:
      start = utf8[0][0] & (~MASKS[utf8[0][1]-1])
    if doAll and utf8[0][0] != start:
      all(fsa, startNode, endNode, start, utf8[0][0]-1, utf8[1:])
    n = fsa.addNode()
    fsa.addEdge(startNode, n, utf8[0][0], utf8[0][0], 'end')
    end(fsa, n, endNode, utf8[1:], True)
 def build(fsa,
          startNode, endNode,
          startUTF8, endUTF8):
  # Break into start, middle, end:
  if startUTF8[0][0] == endUTF8[0][0]:
    # Degen case: lead with the same byte:
    if len(startUTF8) == 1 and len(endUTF8) == 1:
      fsa.addEdge(startNode, endNode, startUTF8[0][0], endUTF8[0][0], 'startend')
      return
    else:
      assert len(startUTF8) != 1
      assert len(endUTF8) != 1
      n = fsa.addNode()
      # single value edge
      fsa.addEdge(startNode, n, startUTF8[0][0], startUTF8[0][0], 'single')
      build(fsa, n, endNode, startUTF8[1:], endUTF8[1:])
  elif len(startUTF8) == len(endUTF8):
    if len(startUTF8) == 1:
      fsa.addEdge(startNode, endNode, startUTF8[0][0], endUTF8[0][0], 'startend')
    else:
      start(fsa, startNode, endNode, startUTF8, False)
      if endUTF8[0][0] - startUTF8[0][0] > 1:
        all(fsa, startNode, endNode, startUTF8[0][0]+1, endUTF8[0][0]-1, startUTF8[1:])
      end(fsa, startNode, endNode, endUTF8, False)
  else:
    # start
    start(fsa, startNode, endNode, startUTF8, True)
    # possibly middle
    byteCount = 1+len(startUTF8)
    while byteCount < len(endUTF8):
      s = toUTF8(utf8Ranges[byteCount-1][0])
      e = toUTF8(utf8Ranges[byteCount-1][1])
      all(fsa, startNode, endNode,
          s[0][0],
          e[0][0],
          s[1:])
      byteCount += 1
    # end
    end(fsa, startNode, endNode, endUTF8, True)
 def main():
  if len(sys.argv) not in (3, 4):
    print
    print 'Usage: python %s startUTF32 endUTF32 [testCode]' % sys.argv[0]
    print
    sys.exit(1)
  utf32Start = int(sys.argv[1])
  utf32End = int(sys.argv[2])
  if utf32Start > utf32End:
    print 'ERROR: start must be <= end'
    sys.exit(1)
  fsa = FSA()
  fsa.start = fsa.addNode('start')
  fsa.end = fsa.addNode('end')
  print 's=%s' % ' '.join([binary(x[0]) for x in toUTF8(utf32Start)])
  print 'e=%s' % ' '.join([binary(x[0]) for x in toUTF8(utf32End)])
  if len(sys.argv) == 4:
    print 't=%s [%s]' % \
          (' '.join([binary(x[0]) for x in toUTF8(int(sys.argv[3]))]),
           ' '.join(['%2x' % x[0] for x in toUTF8(int(sys.argv[3]))]))
  build(fsa, fsa.start, fsa.end,
        toUTF8(utf32Start),
        toUTF8(utf32End))
  fsa.toPNG('test', '/tmp/outpy.png')
  print 'Saved to /tmp/outpy.png...'
  test(fsa, utf32Start, utf32End, 100000);
 def test(fsa, utf32Start, utf32End, count):
  # verify correct ints are accepted
  for i in range(count):
    r = random.randint(utf32Start, utf32End)
    dest = fsa.run([tup[0] for tup in toUTF8(r)])
    if dest != fsa.end:
      print 'FAILED: valid %s (%s) is not accepted' % (r, ' '.join([binary(x[0]) for x in toUTF8(r)]))
      return False
  invalidRange = MAX_UNICODE - (utf32End - utf32Start + 1)
  if invalidRange >= 0:
    # verify invalid ints are not accepted
    for i in range(count):
      r = random.randint(0, invalidRange-1)
      if r >= utf32Start:
        r = utf32End + 1 + r - utf32Start
      dest = fsa.run([tup[0] for tup in toUTF8(r)])
      if dest != -1:
        print 'FAILED: invalid %s (%s) is accepted' % (r, ' '.join([binary(x[0]) for x in toUTF8(r)]))
        return False
  return True
 def stress():
  print 'Testing...'
  iter = 0
  while True:
    if iter % 10 == 0:
      print '%s...' % iter
    iter += 1
    v1 = random.randint(0, MAX_UNICODE)
    v2 = random.randint(0, MAX_UNICODE)
    if v2 < v1:
      v1, v2 = v2, v1
    utf32Start = v1
    utf32End = v2
    fsa = FSA()
    fsa.start = fsa.addNode('start')
    fsa.end = fsa.addNode('end')
    build(fsa, fsa.start, fsa.end,
          toUTF8(utf32Start),
          toUTF8(utf32End))
    if not test(fsa, utf32Start, utf32End, 10000):
      print 'FAILED on utf32Start=%s utf32End=%s' % (utf32Start, utf32End)
 if __name__ == '__main__':
  if len(sys.argv) > 1:
    main()
  else:
    stress()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/automaton/createLevAutomata.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/automaton/createLevAutomata.py
@ -1,500 +0,0 @@
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Note, this file is known to work with rev 120 of the moman
 # repository (http://bitbucket.org/jpbarrette/moman/overview)
 #
 # See also: http://sites.google.com/site/rrettesite/moman
 import math
 import os
 import sys
 #sys.path.insert(0, 'moman/finenight/python')
 sys.path.insert(0, '../../../../../../../../build/core/moman/finenight/python')
 try:
  from possibleStates import genTransitions
 except ImportError:
  from finenight.possibleStates import genTransitions
 MODE = 'array'
 PACKED = True
 WORD = 64
 LOG2_WORD = int(math.log(WORD)/math.log(2))
 #MODE = 'switch'
 class LineOutput:
  def __init__(self, indent=''):
    self.l = []
    self._indent = self.startIndent = indent
    self.inComment = False
  def __call__(self, s, indent=0):
    if s.find('}') != -1:
      assert self._indent != self.startIndent
      self._indent = self._indent[:-2]
    if indent != 0:
      indent0 = '  ' * (len(self._indent)/2+indent)
    else:
      indent0 = self._indent
    if s.find('/*') != -1:
      if s.find('*/') == -1:
        self.inComment = True
    elif s.find('*/') != -1:
      self.inComment = True
    if self.inComment:
      self.l.append(indent0 + s)
    else:
      self.l.append(indent0 + s.lstrip())
    self.inComment = self.inComment and s.find('*/') == -1
    if s.find('{') != -1:
      self._indent += '  '
  def __str__(self):
    if True:
      assert self._indent == self.startIndent, 'indent %d vs start indent %d' % \
             (len(self._indent), len(self.startIndent))
    return '\n'.join(self.l)
  def indent(self):
    self._indent += '  '
  def outdent(self):
    assert self._indent != self.startIndent
    self._indent = self._indent[:-2]
 def charVarNumber(charVar):
  """
  Maps binary number (eg [1, 0, 1]) to its decimal value (5).
  """
  p = 1
  sum = 0
  downTo = len(charVar)-1
  while downTo >= 0:
    sum += p * int(charVar[downTo])
    p *= 2
    downTo -= 1
  return sum
 def main():
  if len(sys.argv) != 3:
    print
    print 'Usage: python -u %s N <True/False>' % sys.argv[0]
    print
    print 'NOTE: the resulting .java file is created in the current working dir!'
    print
    sys.exit(1)
  n = int(sys.argv[1])
  transpose = (sys.argv[2] == "True")
  tables = genTransitions(n, transpose)
  stateMap = {}
  # init null state
  stateMap['[]'] = -1
  # init start state
  stateMap['[(0, 0)]'] = 0
  w = LineOutput()
  w('package com.fr.third.org.apache.lucene.util.automaton;')
  w('')
  w('/*')
  w(' * Licensed to the Apache Software Foundation (ASF) under one or more')
  w(' * contributor license agreements.  See the NOTICE file distributed with')
  w(' * this work for additional information regarding copyright ownership.')
  w(' * The ASF licenses this file to You under the Apache License, Version 2.0')
  w(' * (the "License"); you may not use this file except in compliance with')
  w(' * the License.  You may obtain a copy of the License at')
  w(' *')
  w(' *     http://www.apache.org/licenses/LICENSE-2.0')
  w(' *')
  w(' * Unless required by applicable law or agreed to in writing, software')
  w(' * distributed under the License is distributed on an "AS IS" BASIS,')
  w(' * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.')
  w(' * See the License for the specific language governing permissions and')
  w(' * limitations under the License.')
  w(' */')
  w('')
  w('// The following code was generated with the moman/finenight pkg')
  w('// This package is available under the MIT License, see NOTICE.txt')
  w('// for more details.')
  w('')
  w('import com.fr.third.org.apache.lucene.util.automaton.LevenshteinAutomata.ParametricDescription;')
  w('')
  if transpose:
    w('/** Parametric description for generating a Levenshtein automaton of degree %s, ' % n)
    w('    with transpositions as primitive edits */')
    className = 'Lev%dTParametricDescription' % n
  else:
    w('/** Parametric description for generating a Levenshtein automaton of degree %s */' % n)
    className = 'Lev%dParametricDescription' % n
  w('class %s extends ParametricDescription {' % className)
  w('')
  w('@Override')
  w('int transition(int absState, int position, int vector) {')
  w('  // null absState should never be passed in')
  w('  assert absState != -1;')
  w('')
  w('  // decode absState -> state, offset')
  w('  int state = absState/(w+1);')
  w('  int offset = absState%(w+1);')
  w('  assert offset >= 0;')
  w('')  
  machines = []
  for i, map in enumerate(tables):
    if i == 0:
      w('if (position == w) {')
    elif i == len(tables)-1:
      w('} else {')
    else:
      w('} else if (position == w-%d) {' % i)
    if i != 0 and MODE == 'switch':
      w('switch(vector) {')
    l = map.items()
    l.sort()
    numCasesPerVector = None
    numVectors = len(l)
    if MODE == 'array':
      toStateArray = []
      toOffsetIncrArray = []
    for charVar, states in l:
      # somehow it's a string:
      charVar = eval(charVar)
      if i != 0 and MODE == 'switch':
        w('case %s: // <%s>' % (charVarNumber(charVar), ','.join([str(x) for x in charVar])))
        w.indent()
      l = states.items()
      byFromState = {}
      # first pass to assign states
      byAction = {}
      for s, (toS, offset) in l:
        state = str(s)
        toState = str(toS)
        if state not in stateMap:
          stateMap[state] = len(stateMap)-1
        if toState not in stateMap:
          stateMap[toState] = len(stateMap)-1
        byFromState[stateMap[state]] = (1+stateMap[toState], offset)
        fromStateDesc = s[1:len(s)-1]
        toStateDesc = ', '.join([str(x) for x in toS])   
        tup = (stateMap[toState], toStateDesc, offset)
        if tup not in byAction:
          byAction[tup] = []
        byAction[tup].append((fromStateDesc, stateMap[state]))
      if numCasesPerVector is None:
        numCasesPerVector = len(l)
      else:
        # we require this to be uniform... empirically it seems to be!
        assert numCasesPerVector == len(l)
      if MODE == 'array':
        for s in range(numCasesPerVector):
          toState, offsetIncr = byFromState[s]
          toStateArray.append(toState)
          toOffsetIncrArray.append(offsetIncr)
      else:
        # render switches
        w('switch(state) {   // %s cases' % len(l))
        for (toState, toStateDesc, offset), lx in byAction.items():
          for fromStateDesc, fromState in lx:
            w('case %s: // %s' % (fromState, fromStateDesc))
          w.indent()
          w('  state = %s; // %s' % (toState, toStateDesc))
          if offset > 0:
            w('  offset += %s;' % offset)
          w('break;')
          w.outdent()
        w('}')
        if i != 0:
          w('break;')
          w.outdent()
    if MODE == 'array':
      # strangely state can come in wildly out of bounds....
      w('  if (state < %d) {' % numCasesPerVector)
      w('    final int loc = vector * %d + state;' % numCasesPerVector)
      if PACKED:
        w('    offset += unpack(offsetIncrs%d, loc, NBITSOFFSET%d);' % (i, i))
        w('    state = unpack(toStates%d, loc, NBITSSTATES%d)-1;' % (i, i))
      else:
        w('    offset += offsetIncrs%d[loc];' % i)
        w('    state = toStates%d[loc]-1;' % i)
      w('  }')
    elif i != 0:
      w('}')
    machines.append((toStateArray, toOffsetIncrArray, numCasesPerVector, numVectors))
  # ends switch statement for machine
  w('}')
  w('')
  w('  if (state == -1) {')
  w('    // null state')
  w('    return -1;')
  w('  } else {')
  w('    // translate back to abs')
  w('    return state*(w+1)+offset;')
  w('  }')
  # ends transition method
  w('}')
  subs = []
  if MODE == 'array':
    w.indent()
    for i, (toStateArray, toOffsetIncrsArray, numCasesPerVector, numVectors) in enumerate(machines):
      w('')
      w.outdent()
      w('// %d vectors; %d states per vector; array length = %d' % \
        (numVectors, numCasesPerVector, numVectors*numCasesPerVector))
      w.indent()
      if PACKED:
        # pack in python
        l, nbits = pack(toStateArray)
        subs.append(('NBITSSTATES%d' % i, str(nbits)))
        w('  private final static long[] toStates%d = new long[] /*%d bits per value */ %s;' % \
          (i, nbits, renderList([hex(long(x)) for x in l])))
        l, nbits = pack(toOffsetIncrsArray)
        subs.append(('NBITSOFFSET%d' % i, str(nbits)))
        w('  private final static long[] offsetIncrs%d = new long[] /*%d bits per value */ %s;' % \
          (i, nbits, renderList([hex(long(x)) for x in l])))
      else:
        w('  private final static int[] toStates%d = new int[] %s;' % \
          (i, renderList([str(x) for x in toStateArray])))
        w('  private final static int[] offsetIncrs%d = new int[] %s;' % \
          (i, renderList([str(x) for x in toStateArray])))
    w.outdent()
  stateMap2 = dict([[v,k] for k,v in stateMap.items()])
  w('')
  w('// state map')
  sum = 0
  minErrors = []
  for i in xrange(len(stateMap2)-1):
    w('//   %s -> %s' % (i, stateMap2[i]))
    # we replace t-notation as its not relevant here
    st = stateMap2[i].replace('t', '')
    v = eval(st)
    minError = min([-i+e for i, e in v])
    c = len(v)
    sum += c
    minErrors.append(minError)
  w('')
  w.indent()
  #w('private final static int[] minErrors = new int[] {%s};' % ','.join([str(x) for x in minErrors]))
  w.outdent()
  w('')
  w('  public %s(int w) {' % className)
  w('    super(w, %d, new int[] {%s});' % (n, ','.join([str(x) for x in minErrors])), indent=1)
  w('  }')
  if 0:
    w('')
    w('@Override')
    w('public int size() { // this can now move up?')
    w('  return %d*(w+1);' % (len(stateMap2)-1))
    w('}')
    w('')
    w('@Override')
    w('public int getPosition(int absState) { // this can now move up?')
    w('  return absState % (w+1);')
    w('}')
    w('')
    w('@Override')
    w('public boolean isAccept(int absState) { // this can now move up?')
    w('  // decode absState -> state, offset')
    w('  int state = absState/(w+1);')
    w('  if (true || state < minErrors.length) {')
    w('    int offset = absState%(w+1);')
    w('    assert offset >= 0;')
    w('    return w - offset + minErrors[state] <= %d;' % n)
    w('  } else {')
    w('    return false;')
    w('  }')
    w('}')
  if MODE == 'array' and PACKED:
    # we moved into super class
    if False:
      w('')
      v = 2
      l = []
      for i in range(63):
        l.append(hex(v-1))
        v *= 2
      w('private final static long[] MASKS = new long[] {%s};' % ','.join(l), indent=1)
      w('')
      # unpack in java
      w('private int unpack(long[] data, int index, int bitsPerValue) {')
      w('  final long bitLoc = bitsPerValue * index;')
      w('  final int dataLoc = (int) (bitLoc >> %d);' % LOG2_WORD)
      w('  final int bitStart = (int) (bitLoc & %d);' % (WORD-1))
      w('  //System.out.println("index=" + index + " dataLoc=" + dataLoc + " bitStart=" + bitStart + " bitsPerV=" + bitsPerValue);')
      w('  if (bitStart + bitsPerValue <= %d) {' % WORD)
      w('    // not split')
      w('    return (int) ((data[dataLoc] >> bitStart) & MASKS[bitsPerValue-1]);')
      w('  } else {')
      w('    // split')
      w('    final int part = %d-bitStart;' % WORD)
      w('    return (int) (((data[dataLoc] >> bitStart) & MASKS[part-1]) +')
      w('      ((data[1+dataLoc] & MASKS[bitsPerValue-part-1]) << part));', indent=1)
      w('  }')
      w('}')
  # class
  w('}')
  w('')
  fileOut = '%s.java' % className
  s = str(w)
  for sub, repl in subs:
    s = s.replace(sub, repl)
  open(fileOut, 'wb').write(s)
  print 'Wrote %s [%d lines; %.1f KB]' % \
        (fileOut, len(w.l), os.path.getsize(fileOut)/1024.)
 def renderList(l):
  lx = ['    ']
  for i in xrange(len(l)):
    if i > 0:
      lx.append(',')
      if i % 4 == 0:
        lx.append('\n    ')
    lx.append(l[i])
  return '{\n%s\n  }' % ''.join(lx)
 MASKS = []
 v = 2
 for i in xrange(63):
  MASKS.append(v-1)
  v *= 2
 # packs into longs; returns long[], numBits
 def pack(l):
  maxV = max(l)
  bitsPerValue = max(1, int(math.ceil(math.log(maxV+1)/math.log(2.0))))
  bitsLeft = WORD
  pendingValue = 0
  packed = []
  for i in xrange(len(l)):
    v = l[i]
    if pendingValue > 0:
      bitsUsed = math.ceil(math.log(pendingValue)/math.log(2.0))
      assert bitsUsed <= (WORD-bitsLeft), 'bitsLeft=%s (%s-%s=%s) bitsUsed=%s' % (bitsLeft, WORD, bitsLeft, WORD-bitsLeft, bitsUsed)
    if bitsLeft >= bitsPerValue:
      pendingValue += v << (WORD-bitsLeft)
      bitsLeft -= bitsPerValue
      if bitsLeft == 0:
        packed.append(pendingValue)
        bitsLeft = WORD
        pendingValue = 0
    else:
      # split
      # bottom bitsLeft go in current word:
      pendingValue += (v & MASKS[bitsLeft-1]) << (WORD-bitsLeft)
      packed.append(pendingValue)
      pendingValue = v >> bitsLeft
      bitsLeft = WORD - (bitsPerValue-bitsLeft)
  if bitsLeft < WORD:
    packed.append(pendingValue)
  # verify(l, packed, bitsPerValue)
  return packed, bitsPerValue
 def verify(data, packedData, bitsPerValue):
  for i in range(len(data)):
    assert data[i] == unpack(packedData, i, bitsPerValue)
 def unpack(data, index, bitsPerValue):
  bitLoc = bitsPerValue * index
  dataLoc = int(bitLoc >> LOG2_WORD)
  bitStart = int(bitLoc & (WORD-1))
  if bitStart + bitsPerValue <= WORD:
    # not split
    return int(((data[dataLoc] >> bitStart) & MASKS[bitsPerValue-1]))
  else:
    # split
    part = WORD-bitStart;
    return int((((data[dataLoc] >> bitStart) & MASKS[part-1]) +
                ((data[1+dataLoc] & MASKS[bitsPerValue-part-1]) << part)))
 if __name__ == '__main__':
  if not __debug__:
    print
    print 'ERROR: please run without -O'
    print
    sys.exit(1)
  main()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_BulkOperation.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_BulkOperation.py
@ -1,335 +0,0 @@
 #! /usr/bin/env python
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from fractions import gcd
 """Code generation for bulk operations"""
 MAX_SPECIALIZED_BITS_PER_VALUE = 24;
 PACKED_64_SINGLE_BLOCK_BPV = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
 OUTPUT_FILE = "BulkOperation.java"
 HEADER = """// This file has been automatically generated, DO NOT EDIT
 package com.fr.third.org.apache.lucene.util.packed;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 """
 FOOTER="""
  protected int writeLong(long block, byte[] blocks, int blocksOffset) {
    for (int j = 1; j <= 8; ++j) {
      blocks[blocksOffset++] = (byte) (block >>> (64 - (j << 3)));
    }
    return blocksOffset;
  }
  /**
   * For every number of bits per value, there is a minimum number of
   * blocks (b) / values (v) you need to write in order to reach the next block
   * boundary:
   *  - 16 bits per value -> b=1, v=4
   *  - 24 bits per value -> b=3, v=8
   *  - 50 bits per value -> b=25, v=32
   *  - 63 bits per value -> b=63, v=64
   *  - ...
   *
   * A bulk read consists in copying <code>iterations*v</code> values that are
   * contained in <code>iterations*b</code> blocks into a <code>long[]</code>
   * (higher values of <code>iterations</code> are likely to yield a better
   * throughput) => this requires n * (b + v) longs in memory.
   *
   * This method computes <code>iterations</code> as
   * <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
   */
  public final int computeIterations(int valueCount, int ramBudget) {
    final int iterations = (ramBudget >>> 3) / (blockCount() + valueCount());
    if (iterations == 0) {
      // at least 1
      return 1;
    } else if ((iterations - 1) * blockCount() >= valueCount) {
      // don't allocate for more than the size of the reader
      return (int) Math.ceil((double) valueCount / valueCount());
    } else {
      return iterations;
    }
  }
 }
 """
 def is_power_of_two(n):
  return n & (n - 1) == 0
 def casts(typ):
  cast_start = "(%s) (" %typ
  cast_end = ")"
  if typ == "long":
    cast_start = ""
    cast_end = ""
  return cast_start, cast_end
 def hexNoLSuffix(n):
  # On 32 bit Python values > (1 << 31)-1 will have L appended by hex function:
  s = hex(n)
  if s.endswith('L'):
    s = s[:-1]
  return s
 def masks(bits):
  if bits == 64:
    return "", ""
  return "(", " & %sL)" %(hexNoLSuffix((1 << bits) - 1))
 def get_type(bits):
  if bits == 8:
    return "byte"
  elif bits == 16:
    return "short"
  elif bits == 32:
    return "int"
  elif bits == 64:
    return "long"
  else:
    assert False
 def block_value_count(bpv, bits=64):
  blocks = bpv
  values = blocks * bits / bpv
  while blocks % 2 == 0 and values % 2 == 0:
    blocks /= 2
    values /= 2
  assert values * bpv == bits * blocks, "%d values, %d blocks, %d bits per value" %(values, blocks, bpv)
  return (blocks, values)
 def packed64(bpv, f):
  blocks, values = block_value_count(bpv)
  mask = (1 << bpv) - 1
  f.write("\n")
  f.write("  public BulkOperationPacked%d() {\n" %bpv)
  f.write("    super(%d);\n" %bpv)
  f.write("    assert blockCount() == %d;\n" %blocks)
  f.write("    assert valueCount() == %d;\n" %values)
  f.write("  }\n\n")
  if bpv == 64:
    f.write("""    @Override
    public void decode(long[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
      System.arraycopy(blocks, blocksOffset, values, valuesOffset, valueCount() * iterations);
    }
    @Override
    public void decode(long[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
      throw new UnsupportedOperationException();
    }
    @Override
    public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
      throw new UnsupportedOperationException();
    }
    @Override
    public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
      LongBuffer.wrap(values, valuesOffset, iterations * valueCount()).put(ByteBuffer.wrap(blocks, blocksOffset, 8 * iterations * blockCount()).asLongBuffer());
    }
 """)
  else:
    p64_decode(bpv, f, 32)
    p64_decode(bpv, f, 64)
 def p64_decode(bpv, f, bits):
  blocks, values = block_value_count(bpv)
  typ = get_type(bits)
  cast_start, cast_end = casts(typ)
  f.write("  @Override\n")
  f.write("  public void decode(long[] blocks, int blocksOffset, %s[] values, int valuesOffset, int iterations) {\n" %typ)
  if bits < bpv:
    f.write("    throw new UnsupportedOperationException();\n")
  else:
    f.write("    for (int i = 0; i < iterations; ++i) {\n")
    mask = (1 << bpv) - 1
    if is_power_of_two(bpv):
      f.write("      final long block = blocks[blocksOffset++];\n")
      f.write("      for (int shift = %d; shift >= 0; shift -= %d) {\n" %(64 - bpv, bpv))
      f.write("        values[valuesOffset++] = %s(block >>> shift) & %d%s;\n" %(cast_start, mask, cast_end))
      f.write("      }\n") 
    else:
      for i in xrange(0, values):
        block_offset = i * bpv / 64
        bit_offset = (i * bpv) % 64
        if bit_offset == 0:
          # start of block
          f.write("      final long block%d = blocks[blocksOffset++];\n" %block_offset);
          f.write("      values[valuesOffset++] = %sblock%d >>> %d%s;\n" %(cast_start, block_offset, 64 - bpv, cast_end))
        elif bit_offset + bpv == 64:
          # end of block
          f.write("      values[valuesOffset++] = %sblock%d & %dL%s;\n" %(cast_start, block_offset, mask, cast_end))
        elif bit_offset + bpv < 64:
          # middle of block
          f.write("      values[valuesOffset++] = %s(block%d >>> %d) & %dL%s;\n" %(cast_start, block_offset, 64 - bit_offset - bpv, mask, cast_end))
        else:
          # value spans across 2 blocks
          mask1 = (1 << (64 - bit_offset)) -1
          shift1 = bit_offset + bpv - 64
          shift2 = 64 - shift1
          f.write("      final long block%d = blocks[blocksOffset++];\n" %(block_offset + 1));
          f.write("      values[valuesOffset++] = %s((block%d & %dL) << %d) | (block%d >>> %d)%s;\n" %(cast_start, block_offset, mask1, shift1, block_offset + 1, shift2, cast_end))
    f.write("    }\n")
  f.write("  }\n\n")
  byte_blocks, byte_values = block_value_count(bpv, 8)
  f.write("  @Override\n")
  f.write("  public void decode(byte[] blocks, int blocksOffset, %s[] values, int valuesOffset, int iterations) {\n" %typ)
  if bits < bpv:
    f.write("    throw new UnsupportedOperationException();\n")
  else:
    if is_power_of_two(bpv) and bpv < 8:
      f.write("    for (int j = 0; j < 8 * iterations; ++j) {\n")
      f.write("      final byte block = blocks[blocksOffset++];\n")
      for shift in xrange(8 - bpv, 0, -bpv):
        f.write("      values[valuesOffset++] = (block >>> %d) & %d;\n" %(shift, mask))
      f.write("      values[valuesOffset++] = block & %d;\n" %mask)
      f.write("    }\n")
    elif bpv == 8:
      f.write("    for (int j = 0; j < 8 * iterations; ++j) {\n")
      f.write("      values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;\n")
      f.write("    }\n")
    elif is_power_of_two(bpv) and bpv > 8:
      f.write("    for (int j = 0; j < %d * iterations; ++j) {\n" %(64 / bpv))
      m = bits <= 32 and "0xFF" or "0xFFL"
      f.write("      values[valuesOffset++] =")
      for i in xrange(bpv / 8 - 1):
        f.write(" ((blocks[blocksOffset++] & %s) << %d) |" %(m, bpv - 8))
      f.write(" (blocks[blocksOffset++] & %s);\n" %m)
      f.write("    }\n")
    else:
      f.write("    for (int i = 0; i < 8 * iterations; ++i) {\n")
      for i in xrange(0, byte_values):
        byte_start = i * bpv / 8
        bit_start = (i * bpv) % 8
        byte_end = ((i + 1) * bpv - 1) / 8
        bit_end = ((i + 1) * bpv - 1) % 8
        shift = lambda b: 8 * (byte_end - b - 1) + 1 + bit_end
        if bit_start == 0:
          f.write("      final %s byte%d = blocks[blocksOffset++] & 0xFF;\n" %(typ, byte_start))
        for b in xrange(byte_start + 1, byte_end + 1):
          f.write("      final %s byte%d = blocks[blocksOffset++] & 0xFF;\n" %(typ, b))
        f.write("      values[valuesOffset++] =")
        if byte_start == byte_end:
          if bit_start == 0:
            if bit_end == 7:
              f.write(" byte%d" %byte_start)
            else:
              f.write(" byte%d >>> %d" %(byte_start, 7 - bit_end))
          else:
            if bit_end == 7:
              f.write(" byte%d & %d" %(byte_start, 2 ** (8 - bit_start) - 1))
            else:
              f.write(" (byte%d >>> %d) & %d" %(byte_start, 7 - bit_end, 2 ** (bit_end - bit_start + 1) - 1))
        else:
          if bit_start == 0:
            f.write(" (byte%d << %d)" %(byte_start, shift(byte_start)))
          else:
            f.write(" ((byte%d & %d) << %d)" %(byte_start, 2 ** (8 - bit_start) - 1, shift(byte_start)))
          for b in xrange(byte_start + 1, byte_end):
            f.write(" | (byte%d << %d)" %(b, shift(b)))
          if bit_end == 7:
            f.write(" | byte%d" %byte_end)
          else:
            f.write(" | (byte%d >>> %d)" %(byte_end, 7 - bit_end))
        f.write(";\n")
      f.write("    }\n")
  f.write("  }\n\n")
 if __name__ == '__main__':
  f = open(OUTPUT_FILE, 'w')
  f.write(HEADER)
  f.write('\n')
  f.write('''/**
 * Efficient sequential read/write of packed integers.
 */\n''')
  f.write('abstract class BulkOperation implements PackedInts.Decoder, PackedInts.Encoder {\n')
  f.write('  private static final BulkOperation[] packedBulkOps = new BulkOperation[] {\n')
  for bpv in xrange(1, 65):
    if bpv > MAX_SPECIALIZED_BITS_PER_VALUE:
      f.write('    new BulkOperationPacked(%d),\n' % bpv)
      continue
    f2 = open('BulkOperationPacked%d.java' % bpv, 'w')
    f2.write(HEADER)
    if bpv == 64:
      f2.write('import java.nio.LongBuffer;\n')
      f2.write('import java.nio.ByteBuffer;\n')
      f2.write('\n')
    f2.write('''/**
 * Efficient sequential read/write of packed integers.
 */\n''')
    f2.write('final class BulkOperationPacked%d extends BulkOperationPacked {\n' % bpv)
    packed64(bpv, f2)
    f2.write('}\n')
    f2.close()
    f.write('    new BulkOperationPacked%d(),\n' % bpv)
  f.write('  };\n')
  f.write('\n')
  f.write('  // NOTE: this is sparse (some entries are null):\n')
  f.write('  private static final BulkOperation[] packedSingleBlockBulkOps = new BulkOperation[] {\n')
  for bpv in xrange(1, max(PACKED_64_SINGLE_BLOCK_BPV)+1):
    if bpv in PACKED_64_SINGLE_BLOCK_BPV:
      f.write('    new BulkOperationPackedSingleBlock(%d),\n' % bpv)
    else:
      f.write('    null,\n')
  f.write('  };\n')
  f.write('\n')
  f.write("\n")
  f.write("  public static BulkOperation of(PackedInts.Format format, int bitsPerValue) {\n")
  f.write("    switch (format) {\n")
  f.write("    case PACKED:\n")
  f.write("      assert packedBulkOps[bitsPerValue - 1] != null;\n")
  f.write("      return packedBulkOps[bitsPerValue - 1];\n")
  f.write("    case PACKED_SINGLE_BLOCK:\n")
  f.write("      assert packedSingleBlockBulkOps[bitsPerValue - 1] != null;\n")
  f.write("      return packedSingleBlockBulkOps[bitsPerValue - 1];\n")
  f.write("    default:\n")
  f.write("      throw new AssertionError();\n")
  f.write("    }\n")
  f.write("  }\n")
  f.write(FOOTER)
  f.close()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_Direct.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_Direct.py
@ -1,175 +0,0 @@
 #! /usr/bin/env python
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 HEADER="""// This file has been automatically generated, DO NOT EDIT
 package com.fr.third.org.apache.lucene.util.packed;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import com.fr.third.org.apache.lucene.store.DataInput;
 import com.fr.third.org.apache.lucene.util.RamUsageEstimator;
 import java.io.IOException;
 import java.util.Arrays;
 """
 TYPES = {8: "byte", 16: "short", 32: "int", 64: "long"}
 MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
 CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
 if __name__ == '__main__':
  for bpv in TYPES.keys():
    type
    f = open("Direct%d.java" %bpv, 'w')
    f.write(HEADER)
    f.write("""/**
 * Direct wrapping of %d-bits values to a backing array.
 * @lucene.internal
 */\n""" %bpv)
    f.write("final class Direct%d extends PackedInts.MutableImpl {\n" %bpv)
    f.write("  final %s[] values;\n\n" %TYPES[bpv])
    f.write("  Direct%d(int valueCount) {\n" %bpv)
    f.write("    super(valueCount, %d);\n" %bpv)
    f.write("    values = new %s[valueCount];\n" %TYPES[bpv])
    f.write("  }\n\n")
    f.write("  Direct%d(DataInput in, int valueCount) throws IOException {\n" %bpv)
    f.write("    this(valueCount);\n")
    f.write("    for (int i = 0; i < valueCount; ++i) {\n")
    f.write("      values[i] = in.read%s();\n" %TYPES[bpv].title())
    f.write("    }\n")
    if bpv != 64:
      f.write("    final int mod = valueCount %% %d;\n" %(64 / bpv))
      f.write("    if (mod != 0) {\n")
      f.write("      for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
      f.write("        in.read%s();\n" %TYPES[bpv].title())
      f.write("      }\n")
      f.write("    }\n")
    f.write("  }\n")
    f.write("""
  @Override
  public long get(final int index) {
    return values[index]%s;
  }
  public void set(final int index, final long value) {
    values[index] = %s(value);
  }
  public long ramBytesUsed() {
    return RamUsageEstimator.sizeOf(values);
  }
  public void clear() {
    Arrays.fill(values, %s0L);
  }
  @Override
  public Object getArray() {
    return values;
  }
  @Override
  public boolean hasArray() {
    return true;
  }
 """ %(MASKS[bpv], CASTS[bpv], CASTS[bpv]))
    if bpv == 64:
      f.write("""
  @Override
  public int get(int index, long[] arr, int off, int len) {
    assert len > 0 : "len must be > 0 (got " + len + ")";
    assert index >= 0 && index < valueCount;
    assert off + len <= arr.length;
    final int gets = Math.min(valueCount - index, len);
    System.arraycopy(values, index, arr, off, gets);
    return gets;
  }
  public int set(int index, long[] arr, int off, int len) {
    assert len > 0 : "len must be > 0 (got " + len + ")";
    assert index >= 0 && index < valueCount;
    assert off + len <= arr.length;
    final int sets = Math.min(valueCount - index, len);
    System.arraycopy(arr, off, values, index, sets);
    return sets;
  }
  @Override
  public void fill(int fromIndex, int toIndex, long val) {
    Arrays.fill(values, fromIndex, toIndex, val);
  }
 """)
    else:
      f.write("""
  @Override
  public int get(int index, long[] arr, int off, int len) {
    assert len > 0 : "len must be > 0 (got " + len + ")";
    assert index >= 0 && index < valueCount;
    assert off + len <= arr.length;
    final int gets = Math.min(valueCount - index, len);
    for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
      arr[o] = values[i]%s;
    }
    return gets;
  }
  public int set(int index, long[] arr, int off, int len) {
    assert len > 0 : "len must be > 0 (got " + len + ")";
    assert index >= 0 && index < valueCount;
    assert off + len <= arr.length;
    final int sets = Math.min(valueCount - index, len);
    for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
      values[i] = %sarr[o];
    }
    return sets;
  }
  @Override
  public void fill(int fromIndex, int toIndex, long val) {
    assert val == (val%s);
    Arrays.fill(values, fromIndex, toIndex, %sval);
  }
 """ %(MASKS[bpv], CASTS[bpv], MASKS[bpv], CASTS[bpv]))
    f.write("}\n")
    f.close()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_Packed64SingleBlock.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_Packed64SingleBlock.py
@ -1,291 +0,0 @@
 #! /usr/bin/env python
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 SUPPORTED_BITS_PER_VALUE = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
 HEADER="""// This file has been automatically generated, DO NOT EDIT
 package com.fr.third.org.apache.lucene.util.packed;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to You under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
 import java.io.IOException;
 import java.util.Arrays;
 import com.fr.third.org.apache.lucene.store.DataInput;
 import com.fr.third.org.apache.lucene.util.RamUsageEstimator;
 /**
 * This class is similar to {@link Packed64} except that it trades space for
 * speed by ensuring that a single block needs to be read/written in order to
 * read/write a value.
 */
 abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
  public static final int MAX_SUPPORTED_BITS_PER_VALUE = %d;
  private static final int[] SUPPORTED_BITS_PER_VALUE = new int[] {%s};
  public static boolean isSupported(int bitsPerValue) {
    return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
  }
  private static int requiredCapacity(int valueCount, int valuesPerBlock) {
    return valueCount / valuesPerBlock
        + (valueCount %% valuesPerBlock == 0 ? 0 : 1);
  }
  final long[] blocks;
  Packed64SingleBlock(int valueCount, int bitsPerValue) {
    super(valueCount, bitsPerValue);
    assert isSupported(bitsPerValue);
    final int valuesPerBlock = 64 / bitsPerValue;
    blocks = new long[requiredCapacity(valueCount, valuesPerBlock)];
  }
  @Override
  public void clear() {
    Arrays.fill(blocks, 0L);
  }
  public long ramBytesUsed() {
    return RamUsageEstimator.sizeOf(blocks);
  }
  @Override
  public int get(int index, long[] arr, int off, int len) {
    assert len > 0 : "len must be > 0 (got " + len + ")";
    assert index >= 0 && index < valueCount;
    len = Math.min(len, valueCount - index);
    assert off + len <= arr.length;
    final int originalIndex = index;
    // go to the next block boundary
    final int valuesPerBlock = 64 / bitsPerValue;
    final int offsetInBlock = index %% valuesPerBlock;
    if (offsetInBlock != 0) {
      for (int i = offsetInBlock; i < valuesPerBlock && len > 0; ++i) {
        arr[off++] = get(index++);
        --len;
      }
      if (len == 0) {
        return index - originalIndex;
      }
    }
    // bulk get
    assert index %% valuesPerBlock == 0;
    final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
    assert decoder.blockCount() == 1;
    assert decoder.valueCount() == valuesPerBlock;
    final int blockIndex = index / valuesPerBlock;
    final int nblocks = (index + len) / valuesPerBlock - blockIndex;
    decoder.decode(blocks, blockIndex, arr, off, nblocks);
    final int diff = nblocks * valuesPerBlock;
    index += diff; len -= diff;
    if (index > originalIndex) {
      // stay at the block boundary
      return index - originalIndex;
    } else {
      // no progress so far => already at a block boundary but no full block to
      // get
      assert index == originalIndex;
      return super.get(index, arr, off, len);
    }
  }
  @Override
  public int set(int index, long[] arr, int off, int len) {
    assert len > 0 : "len must be > 0 (got " + len + ")";
    assert index >= 0 && index < valueCount;
    len = Math.min(len, valueCount - index);
    assert off + len <= arr.length;
    final int originalIndex = index;
    // go to the next block boundary
    final int valuesPerBlock = 64 / bitsPerValue;
    final int offsetInBlock = index %% valuesPerBlock;
    if (offsetInBlock != 0) {
      for (int i = offsetInBlock; i < valuesPerBlock && len > 0; ++i) {
        set(index++, arr[off++]);
        --len;
      }
      if (len == 0) {
        return index - originalIndex;
      }
    }
    // bulk set
    assert index %% valuesPerBlock == 0;
    final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
    assert op.blockCount() == 1;
    assert op.valueCount() == valuesPerBlock;
    final int blockIndex = index / valuesPerBlock;
    final int nblocks = (index + len) / valuesPerBlock - blockIndex;
    op.encode(arr, off, blocks, blockIndex, nblocks);
    final int diff = nblocks * valuesPerBlock;
    index += diff; len -= diff;
    if (index > originalIndex) {
      // stay at the block boundary
      return index - originalIndex;
    } else {
      // no progress so far => already at a block boundary but no full block to
      // set
      assert index == originalIndex;
      return super.set(index, arr, off, len);
    }
  }
  @Override
  public void fill(int fromIndex, int toIndex, long val) {
    assert fromIndex >= 0;
    assert fromIndex <= toIndex;
    assert PackedInts.bitsRequired(val) <= bitsPerValue;
    final int valuesPerBlock = 64 / bitsPerValue;
    if (toIndex - fromIndex <= valuesPerBlock << 1) {
      // there needs to be at least one full block to set for the block
      // approach to be worth trying
      super.fill(fromIndex, toIndex, val);
      return;
    }
    // set values naively until the next block start
    int fromOffsetInBlock = fromIndex %% valuesPerBlock;
    if (fromOffsetInBlock != 0) {
      for (int i = fromOffsetInBlock; i < valuesPerBlock; ++i) {
        set(fromIndex++, val);
      }
      assert fromIndex %% valuesPerBlock == 0;
    }
    // bulk set of the inner blocks
    final int fromBlock = fromIndex / valuesPerBlock;
    final int toBlock = toIndex / valuesPerBlock;
    assert fromBlock * valuesPerBlock == fromIndex;
    long blockValue = 0L;
    for (int i = 0; i < valuesPerBlock; ++i) {
      blockValue = blockValue | (val << (i * bitsPerValue));
    }
    Arrays.fill(blocks, fromBlock, toBlock, blockValue);
    // fill the gap
    for (int i = valuesPerBlock * toBlock; i < toIndex; ++i) {
      set(i, val);
    }
  }
  @Override
  protected PackedInts.Format getFormat() {
    return PackedInts.Format.PACKED_SINGLE_BLOCK;
  }
  @Override
  public String toString() {
    return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
        + ", size=" + size() + ", elements.length=" + blocks.length + ")";
  }
  public static Packed64SingleBlock create(DataInput in,
      int valueCount, int bitsPerValue) throws IOException {
    Packed64SingleBlock reader = create(valueCount, bitsPerValue);
    for (int i = 0; i < reader.blocks.length; ++i) {
      reader.blocks[i] = in.readLong();
    }
    return reader;
  }
 """ %(SUPPORTED_BITS_PER_VALUE[-1], ", ".join(map(str, SUPPORTED_BITS_PER_VALUE)))
 FOOTER = "}"
 if __name__ == '__main__':
  f = open("Packed64SingleBlock.java", 'w')
  f.write(HEADER)
  f.write("  public static Packed64SingleBlock create(int valueCount, int bitsPerValue) {\n")
  f.write("    switch (bitsPerValue) {\n")
  for bpv in SUPPORTED_BITS_PER_VALUE:
    f.write("      case %d:\n" %bpv)
    f.write("        return new Packed64SingleBlock%d(valueCount);\n" %bpv)
  f.write("      default:\n")
  f.write("        throw new IllegalArgumentException(\"Unsupported number of bits per value: \" + %d);\n" %bpv)
  f.write("    }\n")
  f.write("  }\n\n")
  for bpv in SUPPORTED_BITS_PER_VALUE:
    log_2 = 0
    while (1 << log_2) < bpv:
      log_2 = log_2 + 1
    if (1 << log_2) != bpv:
      log_2 = None
    f.write("  static class Packed64SingleBlock%d extends Packed64SingleBlock {\n\n" %bpv)
    f.write("    Packed64SingleBlock%d(int valueCount) {\n" %bpv)
    f.write("      super(valueCount, %d);\n" %bpv)
    f.write("    }\n\n")
    f.write("    @Override\n")
    f.write("    public long get(int index) {\n")
    if log_2 is not None:
      f.write("      final int o = index >>> %d;\n" %(6 - log_2))
      f.write("      final int b = index & %d;\n" %((1 << (6 - log_2)) - 1))
      f.write("      final int shift = b << %d;\n" %log_2)
    else:
      f.write("      final int o = index / %d;\n" %(64 / bpv))
      f.write("      final int b = index %% %d;\n" %(64 / bpv))
      f.write("      final int shift = b * %d;\n" %bpv)
    f.write("      return (blocks[o] >>> shift) & %dL;\n" %((1 << bpv) - 1))
    f.write("    }\n\n")
    f.write("    @Override\n")
    f.write("    public void set(int index, long value) {\n")
    if log_2 is not None:
      f.write("      final int o = index >>> %d;\n" %(6 - log_2))
      f.write("      final int b = index & %d;\n" %((1 << (6 - log_2)) - 1))
      f.write("      final int shift = b << %d;\n" %log_2)
    else:
      f.write("      final int o = index / %d;\n" %(64 / bpv))
      f.write("      final int b = index %% %d;\n" %(64 / bpv))
      f.write("      final int shift = b * %d;\n" %bpv)
    f.write("      blocks[o] = (blocks[o] & ~(%dL << shift)) | (value << shift);\n" % ((1 << bpv) - 1))
    f.write("    }\n\n")
    f.write("  }\n\n")
  f.write(FOOTER)
  f.close()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py
@ -1,161 +0,0 @@
 #! /usr/bin/env python
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 HEADER="""// This file has been automatically generated, DO NOT EDIT
 package com.fr.third.org.apache.lucene.util.packed;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import com.fr.third.org.apache.lucene.store.DataInput;
 import com.fr.third.org.apache.lucene.util.RamUsageEstimator;
 import java.io.IOException;
 import java.util.Arrays;
 """
 TYPES = {8: "byte", 16: "short"}
 MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
 CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
 if __name__ == '__main__':
  for bpv in TYPES.keys():
    type
    f = open("Packed%dThreeBlocks.java" %bpv, 'w')
    f.write(HEADER)
    f.write("""/**
 * Packs integers into 3 %ss (%d bits per value).
 * @lucene.internal
 */\n""" %(TYPES[bpv], bpv*3))
    f.write("final class Packed%dThreeBlocks extends PackedInts.MutableImpl {\n" %bpv)
    f.write("  final %s[] blocks;\n\n" %TYPES[bpv])
    f.write("  public static final int MAX_SIZE = Integer.MAX_VALUE / 3;\n\n")
    f.write("  Packed%dThreeBlocks(int valueCount) {\n" %bpv)
    f.write("    super(valueCount, %d);\n" %(bpv*3))
    f.write("    if (valueCount > MAX_SIZE) {\n")
    f.write("      throw new ArrayIndexOutOfBoundsException(\"MAX_SIZE exceeded\");\n")
    f.write("    }\n")
    f.write("    blocks = new %s[valueCount * 3];\n" %TYPES[bpv])
    f.write("  }\n\n")
    f.write("  Packed%dThreeBlocks(DataInput in, int valueCount) throws IOException {\n" %bpv)
    f.write("    this(valueCount);\n")
    f.write("    for (int i = 0; i < 3 * valueCount; ++i) {\n")
    f.write("      blocks[i] = in.read%s();\n" %TYPES[bpv].title())
    f.write("    }\n")
    f.write("    final int mod = blocks.length %% %d;\n" %(64 / bpv))
    f.write("    if (mod != 0) {\n")
    f.write("      for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
    f.write("         in.read%s();\n" %TYPES[bpv].title())
    f.write("      }\n")
    f.write("    }\n")
    f.write("  }\n")
    f.write("""
  @Override
  public long get(int index) {
    final int o = index * 3;
    return (blocks[o]%s) << %d | (blocks[o+1]%s) << %d | (blocks[o+2]%s);
  }
  @Override
  public int get(int index, long[] arr, int off, int len) {
    assert len > 0 : "len must be > 0 (got " + len + ")";
    assert index >= 0 && index < valueCount;
    assert off + len <= arr.length;
    final int gets = Math.min(valueCount - index, len);
    for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
      arr[off++] = (blocks[i]%s) << %d | (blocks[i+1]%s) << %d | (blocks[i+2]%s);
    }
    return gets;
  }
  @Override
  public void set(int index, long value) {
    final int o = index * 3;
    blocks[o] = %s(value >>> %d);
    blocks[o+1] = %s(value >>> %d);
    blocks[o+2] = %svalue;
  }
  @Override
  public int set(int index, long[] arr, int off, int len) {
    assert len > 0 : "len must be > 0 (got " + len + ")";
    assert index >= 0 && index < valueCount;
    assert off + len <= arr.length;
    final int sets = Math.min(valueCount - index, len);
    for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
      final long value = arr[i];
      blocks[o++] = %s(value >>> %d);
      blocks[o++] = %s(value >>> %d);
      blocks[o++] = %svalue;
    }
    return sets;
  }
  @Override
  public void fill(int fromIndex, int toIndex, long val) {
    final %s block1 = %s(val >>> %d);
    final %s block2 = %s(val >>> %d);
    final %s block3 = %sval;
    for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
      blocks[i] = block1;
      blocks[i+1] = block2;
      blocks[i+2] = block3;
    }
  }
  @Override
  public void clear() {
    Arrays.fill(blocks, %s0);
  }
  public long ramBytesUsed() {
    return RamUsageEstimator.sizeOf(blocks);
  }
  @Override
  public String toString() {
    return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
        + ", size=" + size() + ", elements.length=" + blocks.length + ")";
  }
 }
 """ %(MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], CASTS[bpv], 2*bpv, CASTS[bpv], bpv, CASTS[bpv], CASTS[bpv],
      2*bpv, CASTS[bpv], bpv, CASTS[bpv], TYPES[bpv], CASTS[bpv], 2*bpv, TYPES[bpv],
      CASTS[bpv], bpv, TYPES[bpv], CASTS[bpv], CASTS[bpv]))
    f.close()