Merge pull request #10054 in CORE/base-third from bugfix/10.0 to feature/10.0

* commit '3a3dc27c330aaee37fc91fa4e78894493ce240bf': REPORT-110010 删除lucene里的py腳本
1 year ago · 04fa2327eb
7 changed files with 0 additions and 2367 deletions
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/analysis/charfilter/htmlentity.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/analysis/charfilter/htmlentity.py
@ -1,539 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-
-# A simple python script to generate an HTML entity map and a regex alternation
-# for inclusion in HTMLStripCharFilter.jflex.
-
-def main():
-  print get_apache_license()
-  codes = {}
-  regex = re.compile(r'\s*<!ENTITY\s+(\S+)\s+"&(?:#38;)?#(\d+);"')
-  for line in get_entity_text().split('\n'):
-    match = regex.match(line)
-    if match:
-      key = match.group(1)
-      if   key == 'quot': codes[key] = r'\"'
-      elif key == 'nbsp': codes[key] = ' ';
-      else              : codes[key] = r'\u%04X' % int(match.group(2))
-
-  keys = sorted(codes)
-
-  first_entry = True
-  output_line = 'CharacterEntities = ( '
-  for key in keys:
-    new_entry = ('"%s"' if first_entry else ' | "%s"') % key
-    first_entry = False
-    if len(output_line) + len(new_entry) >= 80:
-      print output_line
-      output_line = '                   '
-    output_line += new_entry
-    if key in ('quot','copy','gt','lt','reg','amp'):
-      new_entry = ' | "%s"' % key.upper()
-      if len(output_line) + len(new_entry) >= 80:
-        print output_line
-        output_line = '                   '
-      output_line += new_entry
-  print output_line, ')'
-
-  print '%{'
-  print '  private static final Map<String,String> upperCaseVariantsAccepted'
-  print '      = new HashMap<String,String>();'
-  print '  static {'
-  print '    upperCaseVariantsAccepted.put("quot", "QUOT");'
-  print '    upperCaseVariantsAccepted.put("copy", "COPY");'
-  print '    upperCaseVariantsAccepted.put("gt", "GT");'
-  print '    upperCaseVariantsAccepted.put("lt", "LT");'
-  print '    upperCaseVariantsAccepted.put("reg", "REG");'
-  print '    upperCaseVariantsAccepted.put("amp", "AMP");'
-  print '  }'
-  print '  private static final CharArrayMap<Character> entityValues'
-  print '      = new CharArrayMap<Character>(Version.LUCENE_40, %i, false);' % len(keys)
-  print '  static {'
-  print '    String[] entities = {'
-  output_line = '     '
-  for key in keys:
-    new_entry = ' "%s", "%s",' % (key, codes[key])
-    if len(output_line) + len(new_entry) >= 80:
-      print output_line
-      output_line = '     '
-    output_line += new_entry
-  print output_line[:-1]
-  print '    };'
-  print '    for (int i = 0 ; i < entities.length ; i += 2) {'
-  print '      Character value = entities[i + 1].charAt(0);'
-  print '      entityValues.put(entities[i], value);'
-  print '      String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);'
-  print '      if (upperCaseVariant != null) {'
-  print '        entityValues.put(upperCaseVariant, value);'
-  print '      }'
-  print '    }'
-  print "  }"
-  print "%}"
-
-def get_entity_text():
-# The text below is taken verbatim from
-# <http://www.w3.org/TR/REC-html40/sgml/entities.html>:
-  text = r"""
-F.1. XHTML Character Entities
-
-XHTML DTDs make available a standard collection of named character entities. Those entities are defined in this section.
-F.1.1. XHTML Latin 1 Character Entities
-
-You can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-lat1.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-lat1.ent.
-
-<!-- ...................................................................... -->
-<!-- XML-compatible ISO Latin 1 Character Entity Set for XHTML ............ -->
-<!-- file: xhtml-lat1.ent
-
-     Typical invocation:
-
-       <!ENTITY % xhtml-lat1
-           PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
-                  "xhtml-lat1.ent" >
-       %xhtml-lat1;
-
-     This DTD module is identified by the PUBLIC and SYSTEM identifiers:
-
-       PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN"
-       SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-lat1.ent"
-
-     Revision:  $Id: xhtml-lat1.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI
-
-     Portions (C) International Organization for Standardization 1986:
-     Permission to copy in any form is granted for use with conforming
-     SGML systems and applications as defined in ISO 8879, provided
-     this notice is included in all copies.
-->
-
-<!ENTITY nbsp   "&#160;" ><!-- no-break space = non-breaking space, U+00A0 ISOnum -->
-<!ENTITY iexcl  "&#161;" ><!-- inverted exclamation mark, U+00A1 ISOnum -->
-<!ENTITY cent   "&#162;" ><!-- cent sign, U+00A2 ISOnum -->
-<!ENTITY pound  "&#163;" ><!-- pound sign, U+00A3 ISOnum -->
-<!ENTITY curren "&#164;" ><!-- currency sign, U+00A4 ISOnum -->
-<!ENTITY yen    "&#165;" ><!-- yen sign = yuan sign, U+00A5 ISOnum -->
-<!ENTITY brvbar "&#166;" ><!-- broken bar = broken vertical bar, U+00A6 ISOnum -->
-<!ENTITY sect   "&#167;" ><!-- section sign, U+00A7 ISOnum -->
-<!ENTITY uml    "&#168;" ><!-- diaeresis = spacing diaeresis, U+00A8 ISOdia -->
-<!ENTITY copy   "&#169;" ><!-- copyright sign, U+00A9 ISOnum -->
-<!ENTITY ordf   "&#170;" ><!-- feminine ordinal indicator, U+00AA ISOnum -->
-<!ENTITY laquo  "&#171;" ><!-- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum -->
-<!ENTITY not    "&#172;" ><!-- not sign, U+00AC ISOnum -->
-<!ENTITY shy    "&#173;" ><!-- soft hyphen = discretionary hyphen, U+00AD ISOnum -->
-<!ENTITY reg    "&#174;" ><!-- registered sign = registered trade mark sign, U+00AE ISOnum -->
-<!ENTITY macr   "&#175;" ><!-- macron = spacing macron = overline = APL overbar, U+00AF ISOdia -->
-<!ENTITY deg    "&#176;" ><!-- degree sign, U+00B0 ISOnum -->
-<!ENTITY plusmn "&#177;" ><!-- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum -->
-<!ENTITY sup2   "&#178;" ><!-- superscript two = superscript digit two = squared, U+00B2 ISOnum -->
-<!ENTITY sup3   "&#179;" ><!-- superscript three = superscript digit three = cubed, U+00B3 ISOnum -->
-<!ENTITY acute  "&#180;" ><!-- acute accent = spacing acute, U+00B4 ISOdia -->
-<!ENTITY micro  "&#181;" ><!-- micro sign, U+00B5 ISOnum -->
-<!ENTITY para   "&#182;" ><!-- pilcrow sign = paragraph sign, U+00B6 ISOnum -->
-<!ENTITY middot "&#183;" ><!-- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum -->
-<!ENTITY cedil  "&#184;" ><!-- cedilla = spacing cedilla, U+00B8 ISOdia -->
-<!ENTITY sup1   "&#185;" ><!-- superscript one = superscript digit one, U+00B9 ISOnum -->
-<!ENTITY ordm   "&#186;" ><!-- masculine ordinal indicator, U+00BA ISOnum -->
-<!ENTITY raquo  "&#187;" ><!-- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum -->
-<!ENTITY frac14 "&#188;" ><!-- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum -->
-<!ENTITY frac12 "&#189;" ><!-- vulgar fraction one half = fraction one half, U+00BD ISOnum -->
-<!ENTITY frac34 "&#190;" ><!-- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum -->
-<!ENTITY iquest "&#191;" ><!-- inverted question mark = turned question mark, U+00BF ISOnum -->
-<!ENTITY Agrave "&#192;" ><!-- latin capital A with grave = latin capital A grave, U+00C0 ISOlat1 -->
-<!ENTITY Aacute "&#193;" ><!-- latin capital A with acute, U+00C1 ISOlat1 -->
-<!ENTITY Acirc  "&#194;" ><!-- latin capital A with circumflex, U+00C2 ISOlat1 -->
-<!ENTITY Atilde "&#195;" ><!-- latin capital A with tilde, U+00C3 ISOlat1 -->
-<!ENTITY Auml   "&#196;" ><!-- latin capital A with diaeresis, U+00C4 ISOlat1 -->
-<!ENTITY Aring  "&#197;" ><!-- latin capital A with ring above = latin capital A ring, U+00C5 ISOlat1 -->
-<!ENTITY AElig  "&#198;" ><!-- latin capital AE = latin capital ligature AE, U+00C6 ISOlat1 -->
-<!ENTITY Ccedil "&#199;" ><!-- latin capital C with cedilla, U+00C7 ISOlat1 -->
-<!ENTITY Egrave "&#200;" ><!-- latin capital E with grave, U+00C8 ISOlat1 -->
-<!ENTITY Eacute "&#201;" ><!-- latin capital E with acute, U+00C9 ISOlat1 -->
-<!ENTITY Ecirc  "&#202;" ><!-- latin capital E with circumflex, U+00CA ISOlat1 -->
-<!ENTITY Euml   "&#203;" ><!-- latin capital E with diaeresis, U+00CB ISOlat1 -->
-<!ENTITY Igrave "&#204;" ><!-- latin capital I with grave, U+00CC ISOlat1 -->
-<!ENTITY Iacute "&#205;" ><!-- latin capital I with acute, U+00CD ISOlat1 -->
-<!ENTITY Icirc  "&#206;" ><!-- latin capital I with circumflex, U+00CE ISOlat1 -->
-<!ENTITY Iuml   "&#207;" ><!-- latin capital I with diaeresis, U+00CF ISOlat1 -->
-<!ENTITY ETH    "&#208;" ><!-- latin capital ETH, U+00D0 ISOlat1 -->
-<!ENTITY Ntilde "&#209;" ><!-- latin capital N with tilde, U+00D1 ISOlat1 -->
-<!ENTITY Ograve "&#210;" ><!-- latin capital O with grave, U+00D2 ISOlat1 -->
-<!ENTITY Oacute "&#211;" ><!-- latin capital O with acute, U+00D3 ISOlat1 -->
-<!ENTITY Ocirc  "&#212;" ><!-- latin capital O with circumflex, U+00D4 ISOlat1 -->
-<!ENTITY Otilde "&#213;" ><!-- latin capital O with tilde, U+00D5 ISOlat1 -->
-<!ENTITY Ouml   "&#214;" ><!-- latin capital O with diaeresis, U+00D6 ISOlat1 -->
-<!ENTITY times  "&#215;" ><!-- multiplication sign, U+00D7 ISOnum -->
-<!ENTITY Oslash "&#216;" ><!-- latin capital O with stroke = latin capital O slash, U+00D8 ISOlat1 -->
-<!ENTITY Ugrave "&#217;" ><!-- latin capital U with grave, U+00D9 ISOlat1 -->
-<!ENTITY Uacute "&#218;" ><!-- latin capital U with acute, U+00DA ISOlat1 -->
-<!ENTITY Ucirc  "&#219;" ><!-- latin capital U with circumflex, U+00DB ISOlat1 -->
-<!ENTITY Uuml   "&#220;" ><!-- latin capital U with diaeresis, U+00DC ISOlat1 -->
-<!ENTITY Yacute "&#221;" ><!-- latin capital Y with acute, U+00DD ISOlat1 -->
-<!ENTITY THORN  "&#222;" ><!-- latin capital THORN, U+00DE ISOlat1 -->
-<!ENTITY szlig  "&#223;" ><!-- latin small sharp s = ess-zed, U+00DF ISOlat1 -->
-<!ENTITY agrave "&#224;" ><!-- latin small a with grave = latin small a grave, U+00E0 ISOlat1 -->
-<!ENTITY aacute "&#225;" ><!-- latin small a with acute, U+00E1 ISOlat1 -->
-<!ENTITY acirc  "&#226;" ><!-- latin small a with circumflex, U+00E2 ISOlat1 -->
-<!ENTITY atilde "&#227;" ><!-- latin small a with tilde, U+00E3 ISOlat1 -->
-<!ENTITY auml   "&#228;" ><!-- latin small a with diaeresis, U+00E4 ISOlat1 -->
-<!ENTITY aring  "&#229;" ><!-- latin small a with ring above = latin small a ring, U+00E5 ISOlat1 -->
-<!ENTITY aelig  "&#230;" ><!-- latin small ae = latin small ligature ae, U+00E6 ISOlat1 -->
-<!ENTITY ccedil "&#231;" ><!-- latin small c with cedilla, U+00E7 ISOlat1 -->
-<!ENTITY egrave "&#232;" ><!-- latin small e with grave, U+00E8 ISOlat1 -->
-<!ENTITY eacute "&#233;" ><!-- latin small e with acute, U+00E9 ISOlat1 -->
-<!ENTITY ecirc  "&#234;" ><!-- latin small e with circumflex, U+00EA ISOlat1 -->
-<!ENTITY euml   "&#235;" ><!-- latin small e with diaeresis, U+00EB ISOlat1 -->
-<!ENTITY igrave "&#236;" ><!-- latin small i with grave, U+00EC ISOlat1 -->
-<!ENTITY iacute "&#237;" ><!-- latin small i with acute, U+00ED ISOlat1 -->
-<!ENTITY icirc  "&#238;" ><!-- latin small i with circumflex, U+00EE ISOlat1 -->
-<!ENTITY iuml   "&#239;" ><!-- latin small i with diaeresis, U+00EF ISOlat1 -->
-<!ENTITY eth    "&#240;" ><!-- latin small eth, U+00F0 ISOlat1 -->
-<!ENTITY ntilde "&#241;" ><!-- latin small n with tilde, U+00F1 ISOlat1 -->
-<!ENTITY ograve "&#242;" ><!-- latin small o with grave, U+00F2 ISOlat1 -->
-<!ENTITY oacute "&#243;" ><!-- latin small o with acute, U+00F3 ISOlat1 -->
-<!ENTITY ocirc  "&#244;" ><!-- latin small o with circumflex, U+00F4 ISOlat1 -->
-<!ENTITY otilde "&#245;" ><!-- latin small o with tilde, U+00F5 ISOlat1 -->
-<!ENTITY ouml   "&#246;" ><!-- latin small o with diaeresis, U+00F6 ISOlat1 -->
-<!ENTITY divide "&#247;" ><!-- division sign, U+00F7 ISOnum -->
-<!ENTITY oslash "&#248;" ><!-- latin small o with stroke, = latin small o slash, U+00F8 ISOlat1 -->
-<!ENTITY ugrave "&#249;" ><!-- latin small u with grave, U+00F9 ISOlat1 -->
-<!ENTITY uacute "&#250;" ><!-- latin small u with acute, U+00FA ISOlat1 -->
-<!ENTITY ucirc  "&#251;" ><!-- latin small u with circumflex, U+00FB ISOlat1 -->
-<!ENTITY uuml   "&#252;" ><!-- latin small u with diaeresis, U+00FC ISOlat1 -->
-<!ENTITY yacute "&#253;" ><!-- latin small y with acute, U+00FD ISOlat1 -->
-<!ENTITY thorn  "&#254;" ><!-- latin small thorn with, U+00FE ISOlat1 -->
-<!ENTITY yuml   "&#255;" ><!-- latin small y with diaeresis, U+00FF ISOlat1 -->
-<!-- end of xhtml-lat1.ent -->
-
-F.1.2. XHTML Special Characters
-
-You can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-special.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-special.ent.
-
-<!-- ...................................................................... -->
-<!-- XML-compatible ISO Special Character Entity Set for XHTML ............ -->
-<!-- file: xhtml-special.ent
-
-     Typical invocation:
-
-       <!ENTITY % xhtml-special
-           PUBLIC "-//W3C//ENTITIES Special for XHTML//EN"
-                  "xhtml-special.ent" >
-       %xhtml-special;
-
-     This DTD module is identified by the PUBLIC and SYSTEM identifiers:
-
-       PUBLIC "-//W3C//ENTITIES Special for XHTML//EN"
-       SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-special.ent"
-
-     Revision:  $Id: xhtml-special.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI
-
-     Portions (C) International Organization for Standardization 1986:
-     Permission to copy in any form is granted for use with conforming
-     SGML systems and applications as defined in ISO 8879, provided
-     this notice is included in all copies.
-
-     Revisions:
-2000-10-28: added &apos; and altered XML Predefined Entities for compatibility
-->
-
-<!-- Relevant ISO entity set is given unless names are newly introduced.
-     New names (i.e., not in ISO 8879 [SGML] list) do not clash with
-     any existing ISO 8879 entity names. ISO 10646 [ISO10646] character
-     numbers are given for each character, in hex. Entity values are
-     decimal conversions of the ISO 10646 values and refer to the
-     document character set. Names are Unicode [UNICODE] names.
-->
-
-<!-- C0 Controls and Basic Latin -->
-<!ENTITY lt      "&#38;#60;" ><!-- less-than sign, U+003C ISOnum -->
-<!ENTITY gt      "&#62;" ><!-- greater-than sign, U+003E ISOnum -->
-<!ENTITY amp     "&#38;#38;" ><!-- ampersand, U+0026 ISOnum -->
-<!ENTITY apos    "&#39;" ><!-- The Apostrophe (Apostrophe Quote, APL Quote), U+0027 ISOnum -->
-<!ENTITY quot    "&#34;" ><!-- quotation mark (Quote Double), U+0022 ISOnum -->
-
-<!-- Latin Extended-A -->
-<!ENTITY OElig   "&#338;" ><!-- latin capital ligature OE, U+0152 ISOlat2 -->
-<!ENTITY oelig   "&#339;" ><!-- latin small ligature oe, U+0153 ISOlat2 -->
-
-<!-- ligature is a misnomer, this is a separate character in some languages -->
-<!ENTITY Scaron  "&#352;" ><!-- latin capital letter S with caron, U+0160 ISOlat2 -->
-<!ENTITY scaron  "&#353;" ><!-- latin small letter s with caron, U+0161 ISOlat2 -->
-<!ENTITY Yuml    "&#376;" ><!-- latin capital letter Y with diaeresis, U+0178 ISOlat2 -->
-
-<!-- Spacing Modifier Letters -->
-<!ENTITY circ    "&#710;" ><!-- modifier letter circumflex accent, U+02C6 ISOpub -->
-<!ENTITY tilde   "&#732;" ><!-- small tilde, U+02DC ISOdia -->
-
-<!-- General Punctuation -->
-<!ENTITY ensp    "&#8194;" ><!-- en space, U+2002 ISOpub -->
-<!ENTITY emsp    "&#8195;" ><!-- em space, U+2003 ISOpub -->
-<!ENTITY thinsp  "&#8201;" ><!-- thin space, U+2009 ISOpub -->
-<!ENTITY zwnj    "&#8204;" ><!-- zero width non-joiner, U+200C NEW RFC 2070 -->
-<!ENTITY zwj     "&#8205;" ><!-- zero width joiner, U+200D NEW RFC 2070 -->
-<!ENTITY lrm     "&#8206;" ><!-- left-to-right mark, U+200E NEW RFC 2070 -->
-<!ENTITY rlm     "&#8207;" ><!-- right-to-left mark, U+200F NEW RFC 2070 -->
-<!ENTITY ndash   "&#8211;" ><!-- en dash, U+2013 ISOpub -->
-<!ENTITY mdash   "&#8212;" ><!-- em dash, U+2014 ISOpub -->
-<!ENTITY lsquo   "&#8216;" ><!-- left single quotation mark, U+2018 ISOnum -->
-<!ENTITY rsquo   "&#8217;" ><!-- right single quotation mark, U+2019 ISOnum -->
-<!ENTITY sbquo   "&#8218;" ><!-- single low-9 quotation mark, U+201A NEW -->
-<!ENTITY ldquo   "&#8220;" ><!-- left double quotation mark, U+201C ISOnum -->
-<!ENTITY rdquo   "&#8221;" ><!-- right double quotation mark, U+201D ISOnum -->
-<!ENTITY bdquo   "&#8222;" ><!-- double low-9 quotation mark, U+201E NEW -->
-<!ENTITY dagger  "&#8224;" ><!-- dagger, U+2020 ISOpub -->
-<!ENTITY Dagger  "&#8225;" ><!-- double dagger, U+2021 ISOpub -->
-<!ENTITY permil  "&#8240;" ><!-- per mille sign, U+2030 ISOtech -->
-
-<!-- lsaquo is proposed but not yet ISO standardized -->
-<!ENTITY lsaquo  "&#8249;" ><!-- single left-pointing angle quotation mark, U+2039 ISO proposed -->
-<!-- rsaquo is proposed but not yet ISO standardized -->
-<!ENTITY rsaquo  "&#8250;" ><!-- single right-pointing angle quotation mark, U+203A ISO proposed -->
-<!ENTITY euro    "&#8364;" ><!-- euro sign, U+20AC NEW -->
-
-<!-- end of xhtml-special.ent -->
-
-F.1.3. XHTML Mathematical, Greek, and Symbolic Characters
-
-You can download this version of this file from http://www.w3.org/TR/2010/REC-xhtml-modularization/DTD/xhtml-symbol.ent. The latest version is available at http://www.w3.org/MarkUp/DTD/xhtml-symbol.ent.
-
-<!-- ...................................................................... -->
-<!-- ISO Math, Greek and Symbolic Character Entity Set for XHTML .......... -->
-<!-- file: xhtml-symbol.ent
-
-     Typical invocation:
-
-       <!ENTITY % xhtml-symbol
-           PUBLIC "-//W3C//ENTITIES Symbols for XHTML//EN"
-                  "xhtml-symbol.ent" >
-       %xhtml-symbol;
-
-     This DTD module is identified by the PUBLIC and SYSTEM identifiers:
-
-       PUBLIC "-//W3C//ENTITIES Symbols for XHTML//EN"
-       SYSTEM "http://www.w3.org/MarkUp/DTD/xhtml-symbol.ent"
-
-     Revision:  $Id: xhtml-symbol.ent,v 4.1 2001/04/10 09:34:14 altheim Exp $ SMI
-
-     Portions (C) International Organization for Standardization 1986:
-     Permission to copy in any form is granted for use with conforming
-     SGML systems and applications as defined in ISO 8879, provided
-     this notice is included in all copies.
-->
-
-<!-- Relevant ISO entity set is given unless names are newly introduced.
-     New names (i.e., not in ISO 8879 [SGML] list) do not clash with
-     any existing ISO 8879 entity names. ISO 10646 [ISO10646] character
-     numbers are given for each character, in hex. Entity values are
-     decimal conversions of the ISO 10646 values and refer to the
-     document character set. Names are Unicode [UNICODE] names.
-->
-
-<!-- Latin Extended-B -->
-<!ENTITY fnof     "&#402;" ><!-- latin small f with hook = function
-                              = florin, U+0192 ISOtech -->
-
-<!-- Greek -->
-<!ENTITY Alpha    "&#913;" ><!-- greek capital letter alpha, U+0391 -->
-<!ENTITY Beta     "&#914;" ><!-- greek capital letter beta, U+0392 -->
-<!ENTITY Gamma    "&#915;" ><!-- greek capital letter gamma, U+0393 ISOgrk3 -->
-<!ENTITY Delta    "&#916;" ><!-- greek capital letter delta, U+0394 ISOgrk3 -->
-<!ENTITY Epsilon  "&#917;" ><!-- greek capital letter epsilon, U+0395 -->
-<!ENTITY Zeta     "&#918;" ><!-- greek capital letter zeta, U+0396 -->
-<!ENTITY Eta      "&#919;" ><!-- greek capital letter eta, U+0397 -->
-<!ENTITY Theta    "&#920;" ><!-- greek capital letter theta, U+0398 ISOgrk3 -->
-<!ENTITY Iota     "&#921;" ><!-- greek capital letter iota, U+0399 -->
-<!ENTITY Kappa    "&#922;" ><!-- greek capital letter kappa, U+039A -->
-<!ENTITY Lambda   "&#923;" ><!-- greek capital letter lambda, U+039B ISOgrk3 -->
-<!ENTITY Mu       "&#924;" ><!-- greek capital letter mu, U+039C -->
-<!ENTITY Nu       "&#925;" ><!-- greek capital letter nu, U+039D -->
-<!ENTITY Xi       "&#926;" ><!-- greek capital letter xi, U+039E ISOgrk3 -->
-<!ENTITY Omicron  "&#927;" ><!-- greek capital letter omicron, U+039F -->
-<!ENTITY Pi       "&#928;" ><!-- greek capital letter pi, U+03A0 ISOgrk3 -->
-<!ENTITY Rho      "&#929;" ><!-- greek capital letter rho, U+03A1 -->
-<!-- there is no Sigmaf, and no U+03A2 character either -->
-<!ENTITY Sigma    "&#931;" ><!-- greek capital letter sigma, U+03A3 ISOgrk3 -->
-<!ENTITY Tau      "&#932;" ><!-- greek capital letter tau, U+03A4 -->
-<!ENTITY Upsilon  "&#933;" ><!-- greek capital letter upsilon,
-                              U+03A5 ISOgrk3 -->
-<!ENTITY Phi      "&#934;" ><!-- greek capital letter phi, U+03A6 ISOgrk3 -->
-<!ENTITY Chi      "&#935;" ><!-- greek capital letter chi, U+03A7 -->
-<!ENTITY Psi      "&#936;" ><!-- greek capital letter psi, U+03A8 ISOgrk3 -->
-<!ENTITY Omega    "&#937;" ><!-- greek capital letter omega, U+03A9 ISOgrk3 -->
-<!ENTITY alpha    "&#945;" ><!-- greek small letter alpha, U+03B1 ISOgrk3 -->
-<!ENTITY beta     "&#946;" ><!-- greek small letter beta, U+03B2 ISOgrk3 -->
-<!ENTITY gamma    "&#947;" ><!-- greek small letter gamma, U+03B3 ISOgrk3 -->
-<!ENTITY delta    "&#948;" ><!-- greek small letter delta, U+03B4 ISOgrk3 -->
-<!ENTITY epsilon  "&#949;" ><!-- greek small letter epsilon, U+03B5 ISOgrk3 -->
-<!ENTITY zeta     "&#950;" ><!-- greek small letter zeta, U+03B6 ISOgrk3 -->
-<!ENTITY eta      "&#951;" ><!-- greek small letter eta, U+03B7 ISOgrk3 -->
-<!ENTITY theta    "&#952;" ><!-- greek small letter theta, U+03B8 ISOgrk3 -->
-<!ENTITY iota     "&#953;" ><!-- greek small letter iota, U+03B9 ISOgrk3 -->
-<!ENTITY kappa    "&#954;" ><!-- greek small letter kappa, U+03BA ISOgrk3 -->
-<!ENTITY lambda   "&#955;" ><!-- greek small letter lambda, U+03BB ISOgrk3 -->
-<!ENTITY mu       "&#956;" ><!-- greek small letter mu, U+03BC ISOgrk3 -->
-<!ENTITY nu       "&#957;" ><!-- greek small letter nu, U+03BD ISOgrk3 -->
-<!ENTITY xi       "&#958;" ><!-- greek small letter xi, U+03BE ISOgrk3 -->
-<!ENTITY omicron  "&#959;" ><!-- greek small letter omicron, U+03BF NEW -->
-<!ENTITY pi       "&#960;" ><!-- greek small letter pi, U+03C0 ISOgrk3 -->
-<!ENTITY rho      "&#961;" ><!-- greek small letter rho, U+03C1 ISOgrk3 -->
-<!ENTITY sigmaf   "&#962;" ><!-- greek small letter final sigma, U+03C2 ISOgrk3 -->
-<!ENTITY sigma    "&#963;" ><!-- greek small letter sigma, U+03C3 ISOgrk3 -->
-<!ENTITY tau      "&#964;" ><!-- greek small letter tau, U+03C4 ISOgrk3 -->
-<!ENTITY upsilon  "&#965;" ><!-- greek small letter upsilon, U+03C5 ISOgrk3 -->
-<!ENTITY phi      "&#966;" ><!-- greek small letter phi, U+03C6 ISOgrk3 -->
-<!ENTITY chi      "&#967;" ><!-- greek small letter chi, U+03C7 ISOgrk3 -->
-<!ENTITY psi      "&#968;" ><!-- greek small letter psi, U+03C8 ISOgrk3 -->
-<!ENTITY omega    "&#969;" ><!-- greek small letter omega, U+03C9 ISOgrk3 -->
-<!ENTITY thetasym "&#977;" ><!-- greek small letter theta symbol, U+03D1 NEW -->
-<!ENTITY upsih    "&#978;" ><!-- greek upsilon with hook symbol, U+03D2 NEW -->
-<!ENTITY piv      "&#982;" ><!-- greek pi symbol, U+03D6 ISOgrk3 -->
-
-<!-- General Punctuation -->
-<!ENTITY bull     "&#8226;" ><!-- bullet = black small circle, U+2022 ISOpub  -->
-<!-- bullet is NOT the same as bullet operator, U+2219 -->
-<!ENTITY hellip   "&#8230;" ><!-- horizontal ellipsis = three dot leader, U+2026 ISOpub  -->
-<!ENTITY prime    "&#8242;" ><!-- prime = minutes = feet, U+2032 ISOtech -->
-<!ENTITY Prime    "&#8243;" ><!-- double prime = seconds = inches, U+2033 ISOtech -->
-<!ENTITY oline    "&#8254;" ><!-- overline = spacing overscore, U+203E NEW -->
-<!ENTITY frasl    "&#8260;" ><!-- fraction slash, U+2044 NEW -->
-
-<!-- Letterlike Symbols -->
-<!ENTITY weierp   "&#8472;" ><!-- script capital P = power set = Weierstrass p, U+2118 ISOamso -->
-<!ENTITY image    "&#8465;" ><!-- blackletter capital I = imaginary part, U+2111 ISOamso -->
-<!ENTITY real     "&#8476;" ><!-- blackletter capital R = real part symbol, U+211C ISOamso -->
-<!ENTITY trade    "&#8482;" ><!-- trade mark sign, U+2122 ISOnum -->
-<!ENTITY alefsym  "&#8501;" ><!-- alef symbol = first transfinite cardinal, U+2135 NEW -->
-<!-- alef symbol is NOT the same as hebrew letter alef, U+05D0 although
-     the same glyph could be used to depict both characters -->
-
-<!-- Arrows -->
-<!ENTITY larr     "&#8592;" ><!-- leftwards arrow, U+2190 ISOnum -->
-<!ENTITY uarr     "&#8593;" ><!-- upwards arrow, U+2191 ISOnum-->
-<!ENTITY rarr     "&#8594;" ><!-- rightwards arrow, U+2192 ISOnum -->
-<!ENTITY darr     "&#8595;" ><!-- downwards arrow, U+2193 ISOnum -->
-<!ENTITY harr     "&#8596;" ><!-- left right arrow, U+2194 ISOamsa -->
-<!ENTITY crarr    "&#8629;" ><!-- downwards arrow with corner leftwards
-                               = carriage return, U+21B5 NEW -->
-<!ENTITY lArr     "&#8656;" ><!-- leftwards double arrow, U+21D0 ISOtech -->
-<!-- Unicode does not say that lArr is the same as the 'is implied by' arrow
-    but also does not have any other character for that function. So ? lArr can
-    be used for 'is implied by' as ISOtech suggests -->
-<!ENTITY uArr     "&#8657;" ><!-- upwards double arrow, U+21D1 ISOamsa -->
-<!ENTITY rArr     "&#8658;" ><!-- rightwards double arrow, U+21D2 ISOtech -->
-<!-- Unicode does not say this is the 'implies' character but does not have
-     another character with this function so ?
-     rArr can be used for 'implies' as ISOtech suggests -->
-<!ENTITY dArr     "&#8659;" ><!-- downwards double arrow, U+21D3 ISOamsa -->
-<!ENTITY hArr     "&#8660;" ><!-- left right double arrow, U+21D4 ISOamsa -->
-
-<!-- Mathematical Operators -->
-<!ENTITY forall   "&#8704;" ><!-- for all, U+2200 ISOtech -->
-<!ENTITY part     "&#8706;" ><!-- partial differential, U+2202 ISOtech  -->
-<!ENTITY exist    "&#8707;" ><!-- there exists, U+2203 ISOtech -->
-<!ENTITY empty    "&#8709;" ><!-- empty set = null set, U+2205 ISOamso -->
-<!ENTITY nabla    "&#8711;" ><!-- nabla = backward difference, U+2207 ISOtech -->
-<!ENTITY isin     "&#8712;" ><!-- element of, U+2208 ISOtech -->
-<!ENTITY notin    "&#8713;" ><!-- not an element of, U+2209 ISOtech -->
-<!ENTITY ni       "&#8715;" ><!-- contains as member, U+220B ISOtech -->
-<!-- should there be a more memorable name than 'ni'? -->
-<!ENTITY prod     "&#8719;" ><!-- n-ary product = product sign, U+220F ISOamsb -->
-<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
-     the same glyph might be used for both -->
-<!ENTITY sum      "&#8721;" ><!-- n-ary sumation, U+2211 ISOamsb -->
-<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
-     though the same glyph might be used for both -->
-<!ENTITY minus    "&#8722;" ><!-- minus sign, U+2212 ISOtech -->
-<!ENTITY lowast   "&#8727;" ><!-- asterisk operator, U+2217 ISOtech -->
-<!ENTITY radic    "&#8730;" ><!-- square root = radical sign, U+221A ISOtech -->
-<!ENTITY prop     "&#8733;" ><!-- proportional to, U+221D ISOtech -->
-<!ENTITY infin    "&#8734;" ><!-- infinity, U+221E ISOtech -->
-<!ENTITY ang      "&#8736;" ><!-- angle, U+2220 ISOamso -->
-<!ENTITY and      "&#8743;" ><!-- logical and = wedge, U+2227 ISOtech -->
-<!ENTITY or       "&#8744;" ><!-- logical or = vee, U+2228 ISOtech -->
-<!ENTITY cap      "&#8745;" ><!-- intersection = cap, U+2229 ISOtech -->
-<!ENTITY cup      "&#8746;" ><!-- union = cup, U+222A ISOtech -->
-<!ENTITY int      "&#8747;" ><!-- integral, U+222B ISOtech -->
-<!ENTITY there4   "&#8756;" ><!-- therefore, U+2234 ISOtech -->
-<!ENTITY sim      "&#8764;" ><!-- tilde operator = varies with = similar to, U+223C ISOtech -->
-<!-- tilde operator is NOT the same character as the tilde, U+007E,
-     although the same glyph might be used to represent both  -->
-<!ENTITY cong     "&#8773;" ><!-- approximately equal to, U+2245 ISOtech -->
-<!ENTITY asymp    "&#8776;" ><!-- almost equal to = asymptotic to, U+2248 ISOamsr -->
-<!ENTITY ne       "&#8800;" ><!-- not equal to, U+2260 ISOtech -->
-<!ENTITY equiv    "&#8801;" ><!-- identical to, U+2261 ISOtech -->
-<!ENTITY le       "&#8804;" ><!-- less-than or equal to, U+2264 ISOtech -->
-<!ENTITY ge       "&#8805;" ><!-- greater-than or equal to, U+2265 ISOtech -->
-<!ENTITY sub      "&#8834;" ><!-- subset of, U+2282 ISOtech -->
-<!ENTITY sup      "&#8835;" ><!-- superset of, U+2283 ISOtech -->
-<!-- note that nsup, 'not a superset of, U+2283' is not covered by the Symbol
-     font encoding and is not included. Should it be, for symmetry?
-     It is in ISOamsn  -->
-<!ENTITY nsub     "&#8836;" ><!-- not a subset of, U+2284 ISOamsn -->
-<!ENTITY sube     "&#8838;" ><!-- subset of or equal to, U+2286 ISOtech -->
-<!ENTITY supe     "&#8839;" ><!-- superset of or equal to, U+2287 ISOtech -->
-<!ENTITY oplus    "&#8853;" ><!-- circled plus = direct sum, U+2295 ISOamsb -->
-<!ENTITY otimes   "&#8855;" ><!-- circled times = vector product, U+2297 ISOamsb -->
-<!ENTITY perp     "&#8869;" ><!-- up tack = orthogonal to = perpendicular, U+22A5 ISOtech -->
-<!ENTITY sdot     "&#8901;" ><!-- dot operator, U+22C5 ISOamsb -->
-<!-- dot operator is NOT the same character as U+00B7 middle dot -->
-
-<!-- Miscellaneous Technical -->
-<!ENTITY lceil    "&#8968;" ><!-- left ceiling = apl upstile, U+2308 ISOamsc  -->
-<!ENTITY rceil    "&#8969;" ><!-- right ceiling, U+2309 ISOamsc  -->
-<!ENTITY lfloor   "&#8970;" ><!-- left floor = apl downstile, U+230A ISOamsc  -->
-<!ENTITY rfloor   "&#8971;" ><!-- right floor, U+230B ISOamsc  -->
-<!ENTITY lang     "&#9001;" ><!-- left-pointing angle bracket = bra, U+2329 ISOtech -->
-<!-- lang is NOT the same character as U+003C 'less than'
-     or U+2039 'single left-pointing angle quotation mark' -->
-<!ENTITY rang     "&#9002;" ><!-- right-pointing angle bracket = ket, U+232A ISOtech -->
-<!-- rang is NOT the same character as U+003E 'greater than'
-     or U+203A 'single right-pointing angle quotation mark' -->
-
-<!-- Geometric Shapes -->
-<!ENTITY loz      "&#9674;" ><!-- lozenge, U+25CA ISOpub -->
-
-<!-- Miscellaneous Symbols -->
-<!ENTITY spades   "&#9824;" ><!-- black spade suit, U+2660 ISOpub -->
-<!-- black here seems to mean filled as opposed to hollow -->
-<!ENTITY clubs    "&#9827;" ><!-- black club suit = shamrock, U+2663 ISOpub -->
-<!ENTITY hearts   "&#9829;" ><!-- black heart suit = valentine, U+2665 ISOpub -->
-<!ENTITY diams    "&#9830;" ><!-- black diamond suit, U+2666 ISOpub -->
-
-<!-- end of xhtml-symbol.ent -->
-"""
-  return text
-
-def get_apache_license():
-  license = r"""/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-"""
-  return license
-
-main()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/automaton/UTF32ToUTF8.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/automaton/UTF32ToUTF8.py
@ -1,366 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import types
-import os
-import sys
-import random
-
-MAX_UNICODE = 0x10FFFF
-
-# TODO
-#   - could be more minimal
-#     - eg when bracket lands on a utf8 boundary, like 3 - 2047 -- they can share the two * edges
-#     - also 3 2048 or 3 65536 -- it should not have an * down the red path, but it does
-
-# MASKS[0] is bottom 1-bit
-# MASKS[1] is bottom 2-bits
-# ...
-
-utf8Ranges = [(0, 127),
-              (128, 2047),
-              (2048, 65535),
-              (65536, 1114111)]
-
-typeToColor = {'startend': 'purple',
-               'start': 'blue',
-               'end': 'red'}
-
-class FSA:
-
-  def __init__(self):
-    # maps fromNode -> (startUTF8, endUTF8, endNode)
-    self.states = {}
-    self.nodeUpto = 0
-
-  def run(self, bytes):
-    state = self.start
-    for b in bytes:
-      found = False
-      oldState = state
-      for label, s, e, n in self.states[state][1:]:
-        if b >= s and b <= e:
-          if found:
-            raise RuntimeError('state %s has ambiguous output for byte %s' % (oldState, b))
-          state = n
-          found = True
-      if not found:
-        return -1
-      
-    return state
-        
-  def addEdge(self, n1, n2, v1, v2, label):
-    """
-    Adds edge from n1-n2, utf8 byte range v1-v2.
-    """
-    assert n1 in self.states
-    assert type(v1) is types.IntType
-    assert type(v2) is types.IntType
-    self.states[n1].append((label, v1, v2, n2))
-
-  def addNode(self, label=None):
-    try:
-      self.states[self.nodeUpto] = [label]
-      return self.nodeUpto
-    finally:
-      self.nodeUpto += 1
-
-  def toDOT(self, label):
-    __l = []
-    w = __l.append
-    endNode = startNode = None
-    for id, details in self.states.items():
-      name = details[0]
-      if name == 'end':
-        endNode = id
-      elif name == 'start':
-        startNode = id
-
-    w('digraph %s {' % label)
-    w('  rankdir=LR;')
-    w('  size="8,5";')
-    w('  node [color=white label=""]; Ns;')
-
-    w('  node [color=black];')
-    w('  node [shape=doublecircle, label=""]; N%s [label="%s"];' % (endNode, endNode))
-    w('  node [shape=circle];')
-
-    w('  N%s [label="%s"];' % (startNode, startNode))
-    w('  Ns -> N%s;' % startNode)
-    for id, details in self.states.items():
-      edges = details[1:]
-      w('  N%s [label="%s"];' % (id, id))
-      for type, s, e, dest in edges:
-        c = typeToColor.get(type, 'black')
-        if type == 'all*':
-          # special case -- matches any utf8 byte at this point
-          label = '*'
-        elif s == e:
-          label = '%s' % binary(s)
-        else:
-          label = '%s-%s' % (binary(s), binary(e))
-        w('  N%s -> N%s [label="%s" color="%s"];' % (id, dest, label, c))
-      if name == 'end':
-        endNode = id
-      elif name == 'start':
-        startNode = id
-    w('}')
-    return '\n'.join(__l)
-
-  def toPNG(self, label, pngOut):
-    open('tmp.dot', 'wb').write(self.toDOT(label))
-    if os.system('dot -Tpng tmp.dot -o %s' % pngOut):
-      raise RuntimeException('dot failed')
-    
-
-MASKS = []
-v = 2
-for i in range(32):
-  MASKS.append(v-1)
-  v *= 2
-
-def binary(x):
-  if x == 0:
-    return '00000000'
-  
-  l = []
-  while x > 0:
-    if x & 1 == 1:
-      l.append('1')
-    else:
-      l.append('0')
-    x = x >> 1
-
-  # big endian!
-  l.reverse()
-
-  l2 = []
-  while len(l) > 0:
-    s = ''.join(l[-8:])
-    if len(s) < 8:
-      s = '0'*(8-len(s)) + s
-    l2.append(s)
-    del l[-8:]
-
-  return ' '.join(l2)
-
-def getUTF8Rest(code, numBytes):
-  l = []
-  for i in range(numBytes):
-    l.append((128 | (code & MASKS[5]), 6))
-    code = code >> 6
-  l.reverse()
-  return tuple(l)
-
-def toUTF8(code):
-  # code = Unicode code point
-  assert code >= 0
-  assert code <= MAX_UNICODE
-
-  if code < 128:
-    # 0xxxxxxx
-    bytes = ((code, 7),)
-  elif code < 2048:
-    # 110yyyxx 10xxxxxx
-    byte1 = (6 << 5) | (code >> 6)
-    bytes = ((byte1, 5),) + getUTF8Rest(code, 1)
-  elif code < 65536:
-    # 1110yyyy 10yyyyxx 10xxxxxx
-    len = 3
-    byte1 = (14 << 4) | (code >> 12)
-    bytes = ((byte1, 4),) + getUTF8Rest(code, 2)
-  else:
-    # 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
-    len = 4
-    byte1 = (30 << 3) | (code >> 18)
-    bytes = ((byte1, 3),) + getUTF8Rest(code, 3)
-
-  return bytes
-
-def all(fsa, startNode, endNode, startCode, endCode, left):
-  if len(left) == 0:
-    fsa.addEdge(startNode, endNode, startCode, endCode, 'all')
-  else:
-    lastN = fsa.addNode()
-    fsa.addEdge(startNode, lastN, startCode, endCode, 'all')
-    while len(left) > 1:
-      n = fsa.addNode()
-      fsa.addEdge(lastN, n, 128, 191, 'all*')
-      left = left[1:]
-      lastN = n
-    fsa.addEdge(lastN, endNode, 128, 191, 'all*')
-          
-def start(fsa, startNode, endNode, utf8, doAll):
-  if len(utf8) == 1:
-    fsa.addEdge(startNode, endNode, utf8[0][0], utf8[0][0] | MASKS[utf8[0][1]-1], 'start')
-  else:
-    n = fsa.addNode()
-    fsa.addEdge(startNode, n, utf8[0][0], utf8[0][0], 'start')
-    start(fsa, n, endNode, utf8[1:], True)
-    end = utf8[0][0] | MASKS[utf8[0][1]-1]
-    if doAll and utf8[0][0] != end:
-      all(fsa, startNode, endNode, utf8[0][0]+1, end, utf8[1:])
-
-def end(fsa, startNode, endNode, utf8, doAll):
-  if len(utf8) == 1:
-    fsa.addEdge(startNode, endNode, utf8[0][0] & ~MASKS[utf8[0][1]-1], utf8[0][0], 'end')
-  else:
-    if utf8[0][1] == 5:
-      # special case -- avoid created unused edges (utf8 doesn't accept certain byte sequences):
-      start = 194
-    else:
-      start = utf8[0][0] & (~MASKS[utf8[0][1]-1])
-    if doAll and utf8[0][0] != start:
-      all(fsa, startNode, endNode, start, utf8[0][0]-1, utf8[1:])
-    n = fsa.addNode()
-    fsa.addEdge(startNode, n, utf8[0][0], utf8[0][0], 'end')
-    end(fsa, n, endNode, utf8[1:], True)
-
-def build(fsa,
-          startNode, endNode,
-          startUTF8, endUTF8):
-
-  # Break into start, middle, end:
-  if startUTF8[0][0] == endUTF8[0][0]:
-    # Degen case: lead with the same byte:
-    if len(startUTF8) == 1 and len(endUTF8) == 1:
-      fsa.addEdge(startNode, endNode, startUTF8[0][0], endUTF8[0][0], 'startend')
-      return
-    else:
-      assert len(startUTF8) != 1
-      assert len(endUTF8) != 1
-      n = fsa.addNode()
-      # single value edge
-      fsa.addEdge(startNode, n, startUTF8[0][0], startUTF8[0][0], 'single')
-      build(fsa, n, endNode, startUTF8[1:], endUTF8[1:])
-  elif len(startUTF8) == len(endUTF8):
-    if len(startUTF8) == 1:
-      fsa.addEdge(startNode, endNode, startUTF8[0][0], endUTF8[0][0], 'startend')
-    else:
-      start(fsa, startNode, endNode, startUTF8, False)
-      if endUTF8[0][0] - startUTF8[0][0] > 1:
-        all(fsa, startNode, endNode, startUTF8[0][0]+1, endUTF8[0][0]-1, startUTF8[1:])
-      end(fsa, startNode, endNode, endUTF8, False)
-  else:
-    # start
-    start(fsa, startNode, endNode, startUTF8, True)
-
-    # possibly middle
-    byteCount = 1+len(startUTF8)
-    while byteCount < len(endUTF8):
-      s = toUTF8(utf8Ranges[byteCount-1][0])
-      e = toUTF8(utf8Ranges[byteCount-1][1])
-      all(fsa, startNode, endNode,
-          s[0][0],
-          e[0][0],
-          s[1:])
-      byteCount += 1
-
-    # end
-    end(fsa, startNode, endNode, endUTF8, True)
-
-def main():
-
-  if len(sys.argv) not in (3, 4):
-    print
-    print 'Usage: python %s startUTF32 endUTF32 [testCode]' % sys.argv[0]
-    print
-    sys.exit(1)
-
-  utf32Start = int(sys.argv[1])
-  utf32End = int(sys.argv[2])
-
-  if utf32Start > utf32End:
-    print 'ERROR: start must be <= end'
-    sys.exit(1)
-
-  fsa = FSA()
-  fsa.start = fsa.addNode('start')
-  fsa.end = fsa.addNode('end')
-
-  print 's=%s' % ' '.join([binary(x[0]) for x in toUTF8(utf32Start)])
-  print 'e=%s' % ' '.join([binary(x[0]) for x in toUTF8(utf32End)])
-
-  if len(sys.argv) == 4:
-    print 't=%s [%s]' % \
-          (' '.join([binary(x[0]) for x in toUTF8(int(sys.argv[3]))]),
-           ' '.join(['%2x' % x[0] for x in toUTF8(int(sys.argv[3]))]))
-  
-  build(fsa, fsa.start, fsa.end,
-        toUTF8(utf32Start),
-        toUTF8(utf32End))
-
-  fsa.toPNG('test', '/tmp/outpy.png')
-  print 'Saved to /tmp/outpy.png...'
-
-  test(fsa, utf32Start, utf32End, 100000);
-
-def test(fsa, utf32Start, utf32End, count):
-
-  # verify correct ints are accepted
-  for i in range(count):
-    r = random.randint(utf32Start, utf32End)
-    dest = fsa.run([tup[0] for tup in toUTF8(r)])
-    if dest != fsa.end:
-      print 'FAILED: valid %s (%s) is not accepted' % (r, ' '.join([binary(x[0]) for x in toUTF8(r)]))
-      return False
-
-  invalidRange = MAX_UNICODE - (utf32End - utf32Start + 1)
-  if invalidRange >= 0:
-    # verify invalid ints are not accepted
-    for i in range(count):
-      r = random.randint(0, invalidRange-1)
-      if r >= utf32Start:
-        r = utf32End + 1 + r - utf32Start
-      dest = fsa.run([tup[0] for tup in toUTF8(r)])
-      if dest != -1:
-        print 'FAILED: invalid %s (%s) is accepted' % (r, ' '.join([binary(x[0]) for x in toUTF8(r)]))
-        return False
-
-  return True
-
-def stress():
-
-  print 'Testing...'
-
-  iter = 0
-  while True:
-    if iter % 10 == 0:
-      print '%s...' % iter
-    iter += 1
-
-    v1 = random.randint(0, MAX_UNICODE)
-    v2 = random.randint(0, MAX_UNICODE)
-    if v2 < v1:
-      v1, v2 = v2, v1
-
-    utf32Start = v1
-    utf32End = v2
-
-    fsa = FSA()
-    fsa.start = fsa.addNode('start')
-    fsa.end = fsa.addNode('end')
-    build(fsa, fsa.start, fsa.end,
-          toUTF8(utf32Start),
-          toUTF8(utf32End))
-
-    if not test(fsa, utf32Start, utf32End, 10000):
-      print 'FAILED on utf32Start=%s utf32End=%s' % (utf32Start, utf32End)
-
-if __name__ == '__main__':
-  if len(sys.argv) > 1:
-    main()
-  else:
-    stress()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/automaton/createLevAutomata.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/automaton/createLevAutomata.py
@ -1,500 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Note, this file is known to work with rev 120 of the moman
-# repository (http://bitbucket.org/jpbarrette/moman/overview)
-#
-# See also: http://sites.google.com/site/rrettesite/moman
-
-import math
-import os
-import sys
-#sys.path.insert(0, 'moman/finenight/python')
-sys.path.insert(0, '../../../../../../../../build/core/moman/finenight/python')
-try:
-  from possibleStates import genTransitions
-except ImportError:
-  from finenight.possibleStates import genTransitions
-
-MODE = 'array'
-PACKED = True
-WORD = 64
-LOG2_WORD = int(math.log(WORD)/math.log(2))
-#MODE = 'switch'
-
-class LineOutput:
-
-  def __init__(self, indent=''):
-    self.l = []
-    self._indent = self.startIndent = indent
-    self.inComment = False
-
-  def __call__(self, s, indent=0):
-    if s.find('}') != -1:
-      assert self._indent != self.startIndent
-      self._indent = self._indent[:-2]
-
-    if indent != 0:
-      indent0 = '  ' * (len(self._indent)/2+indent)
-    else:
-      indent0 = self._indent
-
-    if s.find('/*') != -1:
-      if s.find('*/') == -1:
-        self.inComment = True
-    elif s.find('*/') != -1:
-      self.inComment = True
-
-    if self.inComment:
-      self.l.append(indent0 + s)
-    else:
-      self.l.append(indent0 + s.lstrip())
-
-    self.inComment = self.inComment and s.find('*/') == -1
-
-    if s.find('{') != -1:
-      self._indent += '  '
-
-  def __str__(self):
-    if True:
-      assert self._indent == self.startIndent, 'indent %d vs start indent %d' % \
-             (len(self._indent), len(self.startIndent))
-    return '\n'.join(self.l)
-
-  def indent(self):
-    self._indent += '  '
-
-  def outdent(self):
-    assert self._indent != self.startIndent
-    self._indent = self._indent[:-2]
-    
-def charVarNumber(charVar):
-  """
-  Maps binary number (eg [1, 0, 1]) to its decimal value (5).
-  """
-
-  p = 1
-  sum = 0
-  downTo = len(charVar)-1
-  while downTo >= 0:
-    sum += p * int(charVar[downTo])
-    p *= 2
-    downTo -= 1
-  return sum
-
-def main():
-
-  if len(sys.argv) != 3:
-    print
-    print 'Usage: python -u %s N <True/False>' % sys.argv[0]
-    print
-    print 'NOTE: the resulting .java file is created in the current working dir!'
-    print
-    sys.exit(1)
-
-  n = int(sys.argv[1])
-
-  transpose = (sys.argv[2] == "True")
-
-  tables = genTransitions(n, transpose)
-
-  stateMap = {}
-
-  # init null state
-  stateMap['[]'] = -1
-
-  # init start state
-  stateMap['[(0, 0)]'] = 0
-
-  w = LineOutput()
-
-  w('package com.fr.third.org.apache.lucene.util.automaton;')
-  w('')
-  w('/*')
-  w(' * Licensed to the Apache Software Foundation (ASF) under one or more')
-  w(' * contributor license agreements.  See the NOTICE file distributed with')
-  w(' * this work for additional information regarding copyright ownership.')
-  w(' * The ASF licenses this file to You under the Apache License, Version 2.0')
-  w(' * (the "License"); you may not use this file except in compliance with')
-  w(' * the License.  You may obtain a copy of the License at')
-  w(' *')
-  w(' *     http://www.apache.org/licenses/LICENSE-2.0')
-  w(' *')
-  w(' * Unless required by applicable law or agreed to in writing, software')
-  w(' * distributed under the License is distributed on an "AS IS" BASIS,')
-  w(' * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.')
-  w(' * See the License for the specific language governing permissions and')
-  w(' * limitations under the License.')
-  w(' */')
-  w('')
-  w('// The following code was generated with the moman/finenight pkg')
-  w('// This package is available under the MIT License, see NOTICE.txt')
-  w('// for more details.')
-  w('')
-  w('import com.fr.third.org.apache.lucene.util.automaton.LevenshteinAutomata.ParametricDescription;')
-  w('')
-  if transpose:
-    w('/** Parametric description for generating a Levenshtein automaton of degree %s, ' % n)
-    w('    with transpositions as primitive edits */')
-    className = 'Lev%dTParametricDescription' % n
-  else:
-    w('/** Parametric description for generating a Levenshtein automaton of degree %s */' % n)
-    className = 'Lev%dParametricDescription' % n
-
-  w('class %s extends ParametricDescription {' % className)
-
-  w('')
-  w('@Override')
-  w('int transition(int absState, int position, int vector) {')
-
-  w('  // null absState should never be passed in')
-  w('  assert absState != -1;')
-
-  w('')
-  w('  // decode absState -> state, offset')
-  w('  int state = absState/(w+1);')
-  w('  int offset = absState%(w+1);')
-  w('  assert offset >= 0;')
-  w('')  
-
-  machines = []
-  
-  for i, map in enumerate(tables):
-    if i == 0:
-      w('if (position == w) {')
-    elif i == len(tables)-1:
-      w('} else {')
-    else:
-      w('} else if (position == w-%d) {' % i)
-
-    if i != 0 and MODE == 'switch':
-      w('switch(vector) {')
-
-    l = map.items()
-    l.sort()
-
-    numCasesPerVector = None
-    numVectors = len(l)
-
-    if MODE == 'array':
-      toStateArray = []
-      toOffsetIncrArray = []
-
-    for charVar, states in l:
-
-      # somehow it's a string:
-      charVar = eval(charVar)
-
-      if i != 0 and MODE == 'switch':
-        w('case %s: // <%s>' % (charVarNumber(charVar), ','.join([str(x) for x in charVar])))
-        w.indent()
-        
-      l = states.items()
-
-      byFromState = {}
-
-      # first pass to assign states
-      byAction = {}
-      for s, (toS, offset) in l:
-        state = str(s)
-        
-        toState = str(toS)
-        if state not in stateMap:
-          stateMap[state] = len(stateMap)-1
-        if toState not in stateMap:
-          stateMap[toState] = len(stateMap)-1
-
-        byFromState[stateMap[state]] = (1+stateMap[toState], offset)
-
-        fromStateDesc = s[1:len(s)-1]
-        toStateDesc = ', '.join([str(x) for x in toS])   
-
-        tup = (stateMap[toState], toStateDesc, offset)
-        if tup not in byAction:
-          byAction[tup] = []
-        byAction[tup].append((fromStateDesc, stateMap[state]))
-
-      if numCasesPerVector is None:
-        numCasesPerVector = len(l)
-      else:
-        # we require this to be uniform... empirically it seems to be!
-        assert numCasesPerVector == len(l)
-
-      if MODE == 'array':
-
-        for s in range(numCasesPerVector):
-          toState, offsetIncr = byFromState[s]
-          toStateArray.append(toState)
-          toOffsetIncrArray.append(offsetIncr)
-
-      else:
-
-        # render switches
-        w('switch(state) {   // %s cases' % len(l))
-
-        for (toState, toStateDesc, offset), lx in byAction.items():
-          for fromStateDesc, fromState in lx:
-            w('case %s: // %s' % (fromState, fromStateDesc))
-          w.indent()
-          w('  state = %s; // %s' % (toState, toStateDesc))
-          if offset > 0:
-            w('  offset += %s;' % offset)
-          w('break;')
-          w.outdent()
-
-        w('}')
-        if i != 0:
-          w('break;')
-          w.outdent()
-
-    if MODE == 'array':
-      # strangely state can come in wildly out of bounds....
-      w('  if (state < %d) {' % numCasesPerVector)
-      w('    final int loc = vector * %d + state;' % numCasesPerVector)
-      if PACKED:
-        w('    offset += unpack(offsetIncrs%d, loc, NBITSOFFSET%d);' % (i, i))
-        w('    state = unpack(toStates%d, loc, NBITSSTATES%d)-1;' % (i, i))
-      else:
-        w('    offset += offsetIncrs%d[loc];' % i)
-        w('    state = toStates%d[loc]-1;' % i)
-      w('  }')
-    elif i != 0:
-      w('}')
-
-    machines.append((toStateArray, toOffsetIncrArray, numCasesPerVector, numVectors))
-
-  # ends switch statement for machine
-  w('}')
-
-  w('')
-
-  w('  if (state == -1) {')
-  w('    // null state')
-  w('    return -1;')
-  w('  } else {')
-  w('    // translate back to abs')
-  w('    return state*(w+1)+offset;')
-  w('  }')
-
-  # ends transition method
-  w('}')
-
-  subs = []
-  if MODE == 'array':
-    w.indent()
-    for i, (toStateArray, toOffsetIncrsArray, numCasesPerVector, numVectors) in enumerate(machines):
-      w('')
-      w.outdent()
-      w('// %d vectors; %d states per vector; array length = %d' % \
-        (numVectors, numCasesPerVector, numVectors*numCasesPerVector))
-      w.indent()
-      if PACKED:
-        # pack in python
-        l, nbits = pack(toStateArray)
-        subs.append(('NBITSSTATES%d' % i, str(nbits)))
-        w('  private final static long[] toStates%d = new long[] /*%d bits per value */ %s;' % \
-          (i, nbits, renderList([hex(long(x)) for x in l])))
-
-        l, nbits = pack(toOffsetIncrsArray)
-        subs.append(('NBITSOFFSET%d' % i, str(nbits)))
-        w('  private final static long[] offsetIncrs%d = new long[] /*%d bits per value */ %s;' % \
-          (i, nbits, renderList([hex(long(x)) for x in l])))
-      else:
-        w('  private final static int[] toStates%d = new int[] %s;' % \
-          (i, renderList([str(x) for x in toStateArray])))
-        w('  private final static int[] offsetIncrs%d = new int[] %s;' % \
-          (i, renderList([str(x) for x in toStateArray])))
-    w.outdent()
-  
-  stateMap2 = dict([[v,k] for k,v in stateMap.items()])
-  w('')
-  w('// state map')
-  sum = 0
-  minErrors = []
-  for i in xrange(len(stateMap2)-1):
-    w('//   %s -> %s' % (i, stateMap2[i]))
-    # we replace t-notation as its not relevant here
-    st = stateMap2[i].replace('t', '')
-    
-    v = eval(st)
-    minError = min([-i+e for i, e in v])
-    c = len(v)
-    sum += c
-    minErrors.append(minError)
-  w('')
-
-  w.indent()
-  #w('private final static int[] minErrors = new int[] {%s};' % ','.join([str(x) for x in minErrors]))
-
-  w.outdent()
-
-  w('')
-  w('  public %s(int w) {' % className)
-  w('    super(w, %d, new int[] {%s});' % (n, ','.join([str(x) for x in minErrors])), indent=1)
-  w('  }')
-
-  if 0:
-    w('')
-    w('@Override')
-    w('public int size() { // this can now move up?')
-    w('  return %d*(w+1);' % (len(stateMap2)-1))
-    w('}')
-
-    w('')
-    w('@Override')
-    w('public int getPosition(int absState) { // this can now move up?')
-    w('  return absState % (w+1);')
-    w('}')
-
-    w('')
-    w('@Override')
-    w('public boolean isAccept(int absState) { // this can now move up?')
-    w('  // decode absState -> state, offset')
-    w('  int state = absState/(w+1);')
-    w('  if (true || state < minErrors.length) {')
-    w('    int offset = absState%(w+1);')
-    w('    assert offset >= 0;')
-    w('    return w - offset + minErrors[state] <= %d;' % n)
-    w('  } else {')
-    w('    return false;')
-    w('  }')
-    w('}')
-
-  if MODE == 'array' and PACKED:
-
-    # we moved into super class
-    if False:
-      w('')
-
-      v = 2
-      l = []
-      for i in range(63):
-        l.append(hex(v-1))
-        v *= 2
-
-      w('private final static long[] MASKS = new long[] {%s};' % ','.join(l), indent=1)
-      w('')
-
-      # unpack in java
-      w('private int unpack(long[] data, int index, int bitsPerValue) {')
-      w('  final long bitLoc = bitsPerValue * index;')
-      w('  final int dataLoc = (int) (bitLoc >> %d);' % LOG2_WORD)
-      w('  final int bitStart = (int) (bitLoc & %d);' % (WORD-1))
-      w('  //System.out.println("index=" + index + " dataLoc=" + dataLoc + " bitStart=" + bitStart + " bitsPerV=" + bitsPerValue);')
-      w('  if (bitStart + bitsPerValue <= %d) {' % WORD)
-      w('    // not split')
-      w('    return (int) ((data[dataLoc] >> bitStart) & MASKS[bitsPerValue-1]);')
-      w('  } else {')
-      w('    // split')
-      w('    final int part = %d-bitStart;' % WORD)
-      w('    return (int) (((data[dataLoc] >> bitStart) & MASKS[part-1]) +')
-      w('      ((data[1+dataLoc] & MASKS[bitsPerValue-part-1]) << part));', indent=1)
-      w('  }')
-      w('}')
-  
-  # class
-  w('}')
-  w('')
-
-  fileOut = '%s.java' % className
-
-  s = str(w)
-  for sub, repl in subs:
-    s = s.replace(sub, repl)
-
-  open(fileOut, 'wb').write(s)
-
-  print 'Wrote %s [%d lines; %.1f KB]' % \
-        (fileOut, len(w.l), os.path.getsize(fileOut)/1024.)
-
-def renderList(l):
-  lx = ['    ']
-  for i in xrange(len(l)):
-    if i > 0:
-      lx.append(',')
-      if i % 4 == 0:
-        lx.append('\n    ')
-    lx.append(l[i])
-  return '{\n%s\n  }' % ''.join(lx)
-
-MASKS = []
-v = 2
-for i in xrange(63):
-  MASKS.append(v-1)
-  v *= 2
-
-# packs into longs; returns long[], numBits
-def pack(l):
-  maxV = max(l)
-  bitsPerValue = max(1, int(math.ceil(math.log(maxV+1)/math.log(2.0))))
-
-  bitsLeft = WORD
-  pendingValue = 0
-
-  packed = []
-  for i in xrange(len(l)):
-    v = l[i]
-    if pendingValue > 0:
-      bitsUsed = math.ceil(math.log(pendingValue)/math.log(2.0))
-      assert bitsUsed <= (WORD-bitsLeft), 'bitsLeft=%s (%s-%s=%s) bitsUsed=%s' % (bitsLeft, WORD, bitsLeft, WORD-bitsLeft, bitsUsed)
-      
-    if bitsLeft >= bitsPerValue:
-      pendingValue += v << (WORD-bitsLeft)
-      bitsLeft -= bitsPerValue
-      if bitsLeft == 0:
-        packed.append(pendingValue)
-        bitsLeft = WORD
-        pendingValue = 0
-    else:
-      # split
-
-      # bottom bitsLeft go in current word:
-      pendingValue += (v & MASKS[bitsLeft-1]) << (WORD-bitsLeft)
-      packed.append(pendingValue)
-
-      pendingValue = v >> bitsLeft
-      bitsLeft = WORD - (bitsPerValue-bitsLeft)
-
-  if bitsLeft < WORD:
-    packed.append(pendingValue)
-
-  # verify(l, packed, bitsPerValue)
-  
-  return packed, bitsPerValue
-
-def verify(data, packedData, bitsPerValue):
-  for i in range(len(data)):
-    assert data[i] == unpack(packedData, i, bitsPerValue)
-
-def unpack(data, index, bitsPerValue):
-  bitLoc = bitsPerValue * index
-  dataLoc = int(bitLoc >> LOG2_WORD)
-  bitStart = int(bitLoc & (WORD-1))
-  if bitStart + bitsPerValue <= WORD:
-    # not split
-    return int(((data[dataLoc] >> bitStart) & MASKS[bitsPerValue-1]))
-  else:
-    # split
-    part = WORD-bitStart;
-    return int((((data[dataLoc] >> bitStart) & MASKS[part-1]) +
-                ((data[1+dataLoc] & MASKS[bitsPerValue-part-1]) << part)))
-  
-if __name__ == '__main__':
-  if not __debug__:
-    print
-    print 'ERROR: please run without -O'
-    print
-    sys.exit(1)
-  main()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_BulkOperation.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_BulkOperation.py
@ -1,335 +0,0 @@
-#! /usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from fractions import gcd
-
-"""Code generation for bulk operations"""
-
-MAX_SPECIALIZED_BITS_PER_VALUE = 24;
-PACKED_64_SINGLE_BLOCK_BPV = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
-OUTPUT_FILE = "BulkOperation.java"
-HEADER = """// This file has been automatically generated, DO NOT EDIT
-
-package com.fr.third.org.apache.lucene.util.packed;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-"""
-
-FOOTER="""
-  protected int writeLong(long block, byte[] blocks, int blocksOffset) {
-    for (int j = 1; j <= 8; ++j) {
-      blocks[blocksOffset++] = (byte) (block >>> (64 - (j << 3)));
-    }
-    return blocksOffset;
-  }
-
-  /**
-   * For every number of bits per value, there is a minimum number of
-   * blocks (b) / values (v) you need to write in order to reach the next block
-   * boundary:
-   *  - 16 bits per value -> b=1, v=4
-   *  - 24 bits per value -> b=3, v=8
-   *  - 50 bits per value -> b=25, v=32
-   *  - 63 bits per value -> b=63, v=64
-   *  - ...
-   *
-   * A bulk read consists in copying <code>iterations*v</code> values that are
-   * contained in <code>iterations*b</code> blocks into a <code>long[]</code>
-   * (higher values of <code>iterations</code> are likely to yield a better
-   * throughput) => this requires n * (b + v) longs in memory.
-   *
-   * This method computes <code>iterations</code> as
-   * <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
-   */
-  public final int computeIterations(int valueCount, int ramBudget) {
-    final int iterations = (ramBudget >>> 3) / (blockCount() + valueCount());
-    if (iterations == 0) {
-      // at least 1
-      return 1;
-    } else if ((iterations - 1) * blockCount() >= valueCount) {
-      // don't allocate for more than the size of the reader
-      return (int) Math.ceil((double) valueCount / valueCount());
-    } else {
-      return iterations;
-    }
-  }
-}
-"""
-
-def is_power_of_two(n):
-  return n & (n - 1) == 0
-
-def casts(typ):
-  cast_start = "(%s) (" %typ
-  cast_end = ")"
-  if typ == "long":
-    cast_start = ""
-    cast_end = ""
-  return cast_start, cast_end
-
-def hexNoLSuffix(n):
-  # On 32 bit Python values > (1 << 31)-1 will have L appended by hex function:
-  s = hex(n)
-  if s.endswith('L'):
-    s = s[:-1]
-  return s
-
-def masks(bits):
-  if bits == 64:
-    return "", ""
-  return "(", " & %sL)" %(hexNoLSuffix((1 << bits) - 1))
-
-def get_type(bits):
-  if bits == 8:
-    return "byte"
-  elif bits == 16:
-    return "short"
-  elif bits == 32:
-    return "int"
-  elif bits == 64:
-    return "long"
-  else:
-    assert False
-
-def block_value_count(bpv, bits=64):
-  blocks = bpv
-  values = blocks * bits / bpv
-  while blocks % 2 == 0 and values % 2 == 0:
-    blocks /= 2
-    values /= 2
-  assert values * bpv == bits * blocks, "%d values, %d blocks, %d bits per value" %(values, blocks, bpv)
-  return (blocks, values)
-
-def packed64(bpv, f):
-  blocks, values = block_value_count(bpv)
-  mask = (1 << bpv) - 1
-
-  f.write("\n")
-  f.write("  public BulkOperationPacked%d() {\n" %bpv)
-  f.write("    super(%d);\n" %bpv)
-  f.write("    assert blockCount() == %d;\n" %blocks)
-  f.write("    assert valueCount() == %d;\n" %values)
-  f.write("  }\n\n")
-
-  if bpv == 64:
-    f.write("""    @Override
-    public void decode(long[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-      System.arraycopy(blocks, blocksOffset, values, valuesOffset, valueCount() * iterations);
-    }
-
-    @Override
-    public void decode(long[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-      LongBuffer.wrap(values, valuesOffset, iterations * valueCount()).put(ByteBuffer.wrap(blocks, blocksOffset, 8 * iterations * blockCount()).asLongBuffer());
-    }
-""")
-  else:
-    p64_decode(bpv, f, 32)
-    p64_decode(bpv, f, 64)
-
-def p64_decode(bpv, f, bits):
-  blocks, values = block_value_count(bpv)
-  typ = get_type(bits)
-  cast_start, cast_end = casts(typ)
-
-  f.write("  @Override\n")
-  f.write("  public void decode(long[] blocks, int blocksOffset, %s[] values, int valuesOffset, int iterations) {\n" %typ)
-  if bits < bpv:
-    f.write("    throw new UnsupportedOperationException();\n")
-  else:
-    f.write("    for (int i = 0; i < iterations; ++i) {\n")
-    mask = (1 << bpv) - 1
-
-    if is_power_of_two(bpv):
-      f.write("      final long block = blocks[blocksOffset++];\n")
-      f.write("      for (int shift = %d; shift >= 0; shift -= %d) {\n" %(64 - bpv, bpv))
-      f.write("        values[valuesOffset++] = %s(block >>> shift) & %d%s;\n" %(cast_start, mask, cast_end))
-      f.write("      }\n") 
-    else:
-      for i in xrange(0, values):
-        block_offset = i * bpv / 64
-        bit_offset = (i * bpv) % 64
-        if bit_offset == 0:
-          # start of block
-          f.write("      final long block%d = blocks[blocksOffset++];\n" %block_offset);
-          f.write("      values[valuesOffset++] = %sblock%d >>> %d%s;\n" %(cast_start, block_offset, 64 - bpv, cast_end))
-        elif bit_offset + bpv == 64:
-          # end of block
-          f.write("      values[valuesOffset++] = %sblock%d & %dL%s;\n" %(cast_start, block_offset, mask, cast_end))
-        elif bit_offset + bpv < 64:
-          # middle of block
-          f.write("      values[valuesOffset++] = %s(block%d >>> %d) & %dL%s;\n" %(cast_start, block_offset, 64 - bit_offset - bpv, mask, cast_end))
-        else:
-          # value spans across 2 blocks
-          mask1 = (1 << (64 - bit_offset)) -1
-          shift1 = bit_offset + bpv - 64
-          shift2 = 64 - shift1
-          f.write("      final long block%d = blocks[blocksOffset++];\n" %(block_offset + 1));
-          f.write("      values[valuesOffset++] = %s((block%d & %dL) << %d) | (block%d >>> %d)%s;\n" %(cast_start, block_offset, mask1, shift1, block_offset + 1, shift2, cast_end))
-    f.write("    }\n")
-  f.write("  }\n\n")
-
-  byte_blocks, byte_values = block_value_count(bpv, 8)
-
-  f.write("  @Override\n")
-  f.write("  public void decode(byte[] blocks, int blocksOffset, %s[] values, int valuesOffset, int iterations) {\n" %typ)
-  if bits < bpv:
-    f.write("    throw new UnsupportedOperationException();\n")
-  else:
-
-    if is_power_of_two(bpv) and bpv < 8:
-      f.write("    for (int j = 0; j < 8 * iterations; ++j) {\n")
-      f.write("      final byte block = blocks[blocksOffset++];\n")
-      for shift in xrange(8 - bpv, 0, -bpv):
-        f.write("      values[valuesOffset++] = (block >>> %d) & %d;\n" %(shift, mask))
-      f.write("      values[valuesOffset++] = block & %d;\n" %mask)
-      f.write("    }\n")
-    elif bpv == 8:
-      f.write("    for (int j = 0; j < 8 * iterations; ++j) {\n")
-      f.write("      values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;\n")
-      f.write("    }\n")
-    elif is_power_of_two(bpv) and bpv > 8:
-      f.write("    for (int j = 0; j < %d * iterations; ++j) {\n" %(64 / bpv))
-      m = bits <= 32 and "0xFF" or "0xFFL"
-      f.write("      values[valuesOffset++] =")
-      for i in xrange(bpv / 8 - 1):
-        f.write(" ((blocks[blocksOffset++] & %s) << %d) |" %(m, bpv - 8))
-      f.write(" (blocks[blocksOffset++] & %s);\n" %m)
-      f.write("    }\n")
-    else:
-      f.write("    for (int i = 0; i < 8 * iterations; ++i) {\n")
-      for i in xrange(0, byte_values):
-        byte_start = i * bpv / 8
-        bit_start = (i * bpv) % 8
-        byte_end = ((i + 1) * bpv - 1) / 8
-        bit_end = ((i + 1) * bpv - 1) % 8
-        shift = lambda b: 8 * (byte_end - b - 1) + 1 + bit_end
-        if bit_start == 0:
-          f.write("      final %s byte%d = blocks[blocksOffset++] & 0xFF;\n" %(typ, byte_start))
-        for b in xrange(byte_start + 1, byte_end + 1):
-          f.write("      final %s byte%d = blocks[blocksOffset++] & 0xFF;\n" %(typ, b))
-        f.write("      values[valuesOffset++] =")
-        if byte_start == byte_end:
-          if bit_start == 0:
-            if bit_end == 7:
-              f.write(" byte%d" %byte_start)
-            else:
-              f.write(" byte%d >>> %d" %(byte_start, 7 - bit_end))
-          else:
-            if bit_end == 7:
-              f.write(" byte%d & %d" %(byte_start, 2 ** (8 - bit_start) - 1))
-            else:
-              f.write(" (byte%d >>> %d) & %d" %(byte_start, 7 - bit_end, 2 ** (bit_end - bit_start + 1) - 1))
-        else:
-          if bit_start == 0:
-            f.write(" (byte%d << %d)" %(byte_start, shift(byte_start)))
-          else:
-            f.write(" ((byte%d & %d) << %d)" %(byte_start, 2 ** (8 - bit_start) - 1, shift(byte_start)))
-          for b in xrange(byte_start + 1, byte_end):
-            f.write(" | (byte%d << %d)" %(b, shift(b)))
-          if bit_end == 7:
-            f.write(" | byte%d" %byte_end)
-          else:
-            f.write(" | (byte%d >>> %d)" %(byte_end, 7 - bit_end))
-        f.write(";\n")
-      f.write("    }\n")
-  f.write("  }\n\n")
-
-if __name__ == '__main__':
-  f = open(OUTPUT_FILE, 'w')
-  f.write(HEADER)
-  f.write('\n')
-  f.write('''/**
- * Efficient sequential read/write of packed integers.
- */\n''')
-
-  f.write('abstract class BulkOperation implements PackedInts.Decoder, PackedInts.Encoder {\n')
-  f.write('  private static final BulkOperation[] packedBulkOps = new BulkOperation[] {\n')
-    
-  for bpv in xrange(1, 65):
-    if bpv > MAX_SPECIALIZED_BITS_PER_VALUE:
-      f.write('    new BulkOperationPacked(%d),\n' % bpv)
-      continue
-    f2 = open('BulkOperationPacked%d.java' % bpv, 'w')
-    f2.write(HEADER)
-    if bpv == 64:
-      f2.write('import java.nio.LongBuffer;\n')
-      f2.write('import java.nio.ByteBuffer;\n')
-      f2.write('\n')
-    f2.write('''/**
- * Efficient sequential read/write of packed integers.
- */\n''')
-    f2.write('final class BulkOperationPacked%d extends BulkOperationPacked {\n' % bpv)
-    packed64(bpv, f2)
-    f2.write('}\n')
-    f2.close()
-    f.write('    new BulkOperationPacked%d(),\n' % bpv)
-    
-  f.write('  };\n')
-  f.write('\n')
-    
-  f.write('  // NOTE: this is sparse (some entries are null):\n')
-  f.write('  private static final BulkOperation[] packedSingleBlockBulkOps = new BulkOperation[] {\n')
-  for bpv in xrange(1, max(PACKED_64_SINGLE_BLOCK_BPV)+1):
-    if bpv in PACKED_64_SINGLE_BLOCK_BPV:
-      f.write('    new BulkOperationPackedSingleBlock(%d),\n' % bpv)
-    else:
-      f.write('    null,\n')
-  f.write('  };\n')
-  f.write('\n')
-      
-  f.write("\n")
-  f.write("  public static BulkOperation of(PackedInts.Format format, int bitsPerValue) {\n")
-  f.write("    switch (format) {\n")
-
-  f.write("    case PACKED:\n")
-  f.write("      assert packedBulkOps[bitsPerValue - 1] != null;\n")
-  f.write("      return packedBulkOps[bitsPerValue - 1];\n")
-  f.write("    case PACKED_SINGLE_BLOCK:\n")
-  f.write("      assert packedSingleBlockBulkOps[bitsPerValue - 1] != null;\n")
-  f.write("      return packedSingleBlockBulkOps[bitsPerValue - 1];\n")
-  f.write("    default:\n")
-  f.write("      throw new AssertionError();\n")
-  f.write("    }\n")
-  f.write("  }\n")
-  f.write(FOOTER)
-  f.close()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_Direct.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_Direct.py
@ -1,175 +0,0 @@
-#! /usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-HEADER="""// This file has been automatically generated, DO NOT EDIT
-
-package com.fr.third.org.apache.lucene.util.packed;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import com.fr.third.org.apache.lucene.store.DataInput;
-import com.fr.third.org.apache.lucene.util.RamUsageEstimator;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-"""
-
-TYPES = {8: "byte", 16: "short", 32: "int", 64: "long"}
-MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
-CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
-
-if __name__ == '__main__':
-  for bpv in TYPES.keys():
-    type
-    f = open("Direct%d.java" %bpv, 'w')
-    f.write(HEADER)
-    f.write("""/**
- * Direct wrapping of %d-bits values to a backing array.
- * @lucene.internal
- */\n""" %bpv)
-    f.write("final class Direct%d extends PackedInts.MutableImpl {\n" %bpv)
-    f.write("  final %s[] values;\n\n" %TYPES[bpv])
-
-    f.write("  Direct%d(int valueCount) {\n" %bpv)
-    f.write("    super(valueCount, %d);\n" %bpv)
-    f.write("    values = new %s[valueCount];\n" %TYPES[bpv])
-    f.write("  }\n\n")
-
-    f.write("  Direct%d(DataInput in, int valueCount) throws IOException {\n" %bpv)
-    f.write("    this(valueCount);\n")
-    f.write("    for (int i = 0; i < valueCount; ++i) {\n")
-    f.write("      values[i] = in.read%s();\n" %TYPES[bpv].title())
-    f.write("    }\n")
-    if bpv != 64:
-      f.write("    final int mod = valueCount %% %d;\n" %(64 / bpv))
-      f.write("    if (mod != 0) {\n")
-      f.write("      for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
-      f.write("        in.read%s();\n" %TYPES[bpv].title())
-      f.write("      }\n")
-      f.write("    }\n")
-    f.write("  }\n")
-
-    f.write("""
-  @Override
-  public long get(final int index) {
-    return values[index]%s;
-  }
-
-  public void set(final int index, final long value) {
-    values[index] = %s(value);
-  }
-
-  public long ramBytesUsed() {
-    return RamUsageEstimator.sizeOf(values);
-  }
-
-  public void clear() {
-    Arrays.fill(values, %s0L);
-  }
-
-  @Override
-  public Object getArray() {
-    return values;
-  }
-
-  @Override
-  public boolean hasArray() {
-    return true;
-  }
-""" %(MASKS[bpv], CASTS[bpv], CASTS[bpv]))
-
-    if bpv == 64:
-      f.write("""
-  @Override
-  public int get(int index, long[] arr, int off, int len) {
-    assert len > 0 : "len must be > 0 (got " + len + ")";
-    assert index >= 0 && index < valueCount;
-    assert off + len <= arr.length;
-
-    final int gets = Math.min(valueCount - index, len);
-    System.arraycopy(values, index, arr, off, gets);
-    return gets;
-  }
-
-  public int set(int index, long[] arr, int off, int len) {
-    assert len > 0 : "len must be > 0 (got " + len + ")";
-    assert index >= 0 && index < valueCount;
-    assert off + len <= arr.length;
-
-    final int sets = Math.min(valueCount - index, len);
-    System.arraycopy(arr, off, values, index, sets);
-    return sets;
-  }
-
-  @Override
-  public void fill(int fromIndex, int toIndex, long val) {
-    Arrays.fill(values, fromIndex, toIndex, val);
-  }
-""")
-    else:
-      f.write("""
-  @Override
-  public int get(int index, long[] arr, int off, int len) {
-    assert len > 0 : "len must be > 0 (got " + len + ")";
-    assert index >= 0 && index < valueCount;
-    assert off + len <= arr.length;
-
-    final int gets = Math.min(valueCount - index, len);
-    for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
-      arr[o] = values[i]%s;
-    }
-    return gets;
-  }
-
-  public int set(int index, long[] arr, int off, int len) {
-    assert len > 0 : "len must be > 0 (got " + len + ")";
-    assert index >= 0 && index < valueCount;
-    assert off + len <= arr.length;
-
-    final int sets = Math.min(valueCount - index, len);
-    for (int i = index, o = off, end = index + sets; i < end; ++i, ++o) {
-      values[i] = %sarr[o];
-    }
-    return sets;
-  }
-
-  @Override
-  public void fill(int fromIndex, int toIndex, long val) {
-    assert val == (val%s);
-    Arrays.fill(values, fromIndex, toIndex, %sval);
-  }
-""" %(MASKS[bpv], CASTS[bpv], MASKS[bpv], CASTS[bpv]))
-
-    f.write("}\n")
-
-    f.close()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_Packed64SingleBlock.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_Packed64SingleBlock.py
@ -1,291 +0,0 @@
-#! /usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-SUPPORTED_BITS_PER_VALUE = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
-
-HEADER="""// This file has been automatically generated, DO NOT EDIT
-
-package com.fr.third.org.apache.lucene.util.packed;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to You under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import com.fr.third.org.apache.lucene.store.DataInput;
-import com.fr.third.org.apache.lucene.util.RamUsageEstimator;
-
-/**
- * This class is similar to {@link Packed64} except that it trades space for
- * speed by ensuring that a single block needs to be read/written in order to
- * read/write a value.
- */
-abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
-
-  public static final int MAX_SUPPORTED_BITS_PER_VALUE = %d;
-  private static final int[] SUPPORTED_BITS_PER_VALUE = new int[] {%s};
-
-  public static boolean isSupported(int bitsPerValue) {
-    return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
-  }
-
-  private static int requiredCapacity(int valueCount, int valuesPerBlock) {
-    return valueCount / valuesPerBlock
-        + (valueCount %% valuesPerBlock == 0 ? 0 : 1);
-  }
-
-  final long[] blocks;
-
-  Packed64SingleBlock(int valueCount, int bitsPerValue) {
-    super(valueCount, bitsPerValue);
-    assert isSupported(bitsPerValue);
-    final int valuesPerBlock = 64 / bitsPerValue;
-    blocks = new long[requiredCapacity(valueCount, valuesPerBlock)];
-  }
-
-  @Override
-  public void clear() {
-    Arrays.fill(blocks, 0L);
-  }
-
-  public long ramBytesUsed() {
-    return RamUsageEstimator.sizeOf(blocks);
-  }
-
-  @Override
-  public int get(int index, long[] arr, int off, int len) {
-    assert len > 0 : "len must be > 0 (got " + len + ")";
-    assert index >= 0 && index < valueCount;
-    len = Math.min(len, valueCount - index);
-    assert off + len <= arr.length;
-
-    final int originalIndex = index;
-
-    // go to the next block boundary
-    final int valuesPerBlock = 64 / bitsPerValue;
-    final int offsetInBlock = index %% valuesPerBlock;
-    if (offsetInBlock != 0) {
-      for (int i = offsetInBlock; i < valuesPerBlock && len > 0; ++i) {
-        arr[off++] = get(index++);
-        --len;
-      }
-      if (len == 0) {
-        return index - originalIndex;
-      }
-    }
-
-    // bulk get
-    assert index %% valuesPerBlock == 0;
-    final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
-    assert decoder.blockCount() == 1;
-    assert decoder.valueCount() == valuesPerBlock;
-    final int blockIndex = index / valuesPerBlock;
-    final int nblocks = (index + len) / valuesPerBlock - blockIndex;
-    decoder.decode(blocks, blockIndex, arr, off, nblocks);
-    final int diff = nblocks * valuesPerBlock;
-    index += diff; len -= diff;
-
-    if (index > originalIndex) {
-      // stay at the block boundary
-      return index - originalIndex;
-    } else {
-      // no progress so far => already at a block boundary but no full block to
-      // get
-      assert index == originalIndex;
-      return super.get(index, arr, off, len);
-    }
-  }
-
-  @Override
-  public int set(int index, long[] arr, int off, int len) {
-    assert len > 0 : "len must be > 0 (got " + len + ")";
-    assert index >= 0 && index < valueCount;
-    len = Math.min(len, valueCount - index);
-    assert off + len <= arr.length;
-
-    final int originalIndex = index;
-
-    // go to the next block boundary
-    final int valuesPerBlock = 64 / bitsPerValue;
-    final int offsetInBlock = index %% valuesPerBlock;
-    if (offsetInBlock != 0) {
-      for (int i = offsetInBlock; i < valuesPerBlock && len > 0; ++i) {
-        set(index++, arr[off++]);
-        --len;
-      }
-      if (len == 0) {
-        return index - originalIndex;
-      }
-    }
-
-    // bulk set
-    assert index %% valuesPerBlock == 0;
-    final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
-    assert op.blockCount() == 1;
-    assert op.valueCount() == valuesPerBlock;
-    final int blockIndex = index / valuesPerBlock;
-    final int nblocks = (index + len) / valuesPerBlock - blockIndex;
-    op.encode(arr, off, blocks, blockIndex, nblocks);
-    final int diff = nblocks * valuesPerBlock;
-    index += diff; len -= diff;
-
-    if (index > originalIndex) {
-      // stay at the block boundary
-      return index - originalIndex;
-    } else {
-      // no progress so far => already at a block boundary but no full block to
-      // set
-      assert index == originalIndex;
-      return super.set(index, arr, off, len);
-    }
-  }
-
-  @Override
-  public void fill(int fromIndex, int toIndex, long val) {
-    assert fromIndex >= 0;
-    assert fromIndex <= toIndex;
-    assert PackedInts.bitsRequired(val) <= bitsPerValue;
-
-    final int valuesPerBlock = 64 / bitsPerValue;
-    if (toIndex - fromIndex <= valuesPerBlock << 1) {
-      // there needs to be at least one full block to set for the block
-      // approach to be worth trying
-      super.fill(fromIndex, toIndex, val);
-      return;
-    }
-
-    // set values naively until the next block start
-    int fromOffsetInBlock = fromIndex %% valuesPerBlock;
-    if (fromOffsetInBlock != 0) {
-      for (int i = fromOffsetInBlock; i < valuesPerBlock; ++i) {
-        set(fromIndex++, val);
-      }
-      assert fromIndex %% valuesPerBlock == 0;
-    }
-
-    // bulk set of the inner blocks
-    final int fromBlock = fromIndex / valuesPerBlock;
-    final int toBlock = toIndex / valuesPerBlock;
-    assert fromBlock * valuesPerBlock == fromIndex;
-
-    long blockValue = 0L;
-    for (int i = 0; i < valuesPerBlock; ++i) {
-      blockValue = blockValue | (val << (i * bitsPerValue));
-    }
-    Arrays.fill(blocks, fromBlock, toBlock, blockValue);
-
-    // fill the gap
-    for (int i = valuesPerBlock * toBlock; i < toIndex; ++i) {
-      set(i, val);
-    }
-  }
-
-  @Override
-  protected PackedInts.Format getFormat() {
-    return PackedInts.Format.PACKED_SINGLE_BLOCK;
-  }
-
-  @Override
-  public String toString() {
-    return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
-        + ", size=" + size() + ", elements.length=" + blocks.length + ")";
-  }
-
-  public static Packed64SingleBlock create(DataInput in,
-      int valueCount, int bitsPerValue) throws IOException {
-    Packed64SingleBlock reader = create(valueCount, bitsPerValue);
-    for (int i = 0; i < reader.blocks.length; ++i) {
-      reader.blocks[i] = in.readLong();
-    }
-    return reader;
-  }
-
-""" %(SUPPORTED_BITS_PER_VALUE[-1], ", ".join(map(str, SUPPORTED_BITS_PER_VALUE)))
-
-FOOTER = "}"
-
-if __name__ == '__main__':
-
-  f = open("Packed64SingleBlock.java", 'w')
-  f.write(HEADER)
-  f.write("  public static Packed64SingleBlock create(int valueCount, int bitsPerValue) {\n")
-  f.write("    switch (bitsPerValue) {\n")
-  for bpv in SUPPORTED_BITS_PER_VALUE:
-    f.write("      case %d:\n" %bpv)
-    f.write("        return new Packed64SingleBlock%d(valueCount);\n" %bpv)
-  f.write("      default:\n")
-  f.write("        throw new IllegalArgumentException(\"Unsupported number of bits per value: \" + %d);\n" %bpv)
-  f.write("    }\n")
-  f.write("  }\n\n")
-
-  for bpv in SUPPORTED_BITS_PER_VALUE:
-    log_2 = 0
-    while (1 << log_2) < bpv:
-      log_2 = log_2 + 1
-    if (1 << log_2) != bpv:
-      log_2 = None
-
-    f.write("  static class Packed64SingleBlock%d extends Packed64SingleBlock {\n\n" %bpv)
-
-    f.write("    Packed64SingleBlock%d(int valueCount) {\n" %bpv)
-    f.write("      super(valueCount, %d);\n" %bpv)
-    f.write("    }\n\n")
-
-    f.write("    @Override\n")
-    f.write("    public long get(int index) {\n")
-    if log_2 is not None:
-      f.write("      final int o = index >>> %d;\n" %(6 - log_2))
-      f.write("      final int b = index & %d;\n" %((1 << (6 - log_2)) - 1))
-      f.write("      final int shift = b << %d;\n" %log_2)
-    else:
-      f.write("      final int o = index / %d;\n" %(64 / bpv))
-      f.write("      final int b = index %% %d;\n" %(64 / bpv))
-      f.write("      final int shift = b * %d;\n" %bpv)
-    f.write("      return (blocks[o] >>> shift) & %dL;\n" %((1 << bpv) - 1))
-    f.write("    }\n\n")
-
-    f.write("    @Override\n")
-    f.write("    public void set(int index, long value) {\n")
-    if log_2 is not None:
-      f.write("      final int o = index >>> %d;\n" %(6 - log_2))
-      f.write("      final int b = index & %d;\n" %((1 << (6 - log_2)) - 1))
-      f.write("      final int shift = b << %d;\n" %log_2)
-    else:
-      f.write("      final int o = index / %d;\n" %(64 / bpv))
-      f.write("      final int b = index %% %d;\n" %(64 / bpv))
-      f.write("      final int shift = b * %d;\n" %bpv)
-    f.write("      blocks[o] = (blocks[o] & ~(%dL << shift)) | (value << shift);\n" % ((1 << bpv) - 1))
-    f.write("    }\n\n")
-    f.write("  }\n\n")
-
-  f.write(FOOTER)
-  f.close()
--- a/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py
+++ b/fine-lucene/src/main/java/com/fr/third/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py
@ -1,161 +0,0 @@
-#! /usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-HEADER="""// This file has been automatically generated, DO NOT EDIT
-
-package com.fr.third.org.apache.lucene.util.packed;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import com.fr.third.org.apache.lucene.store.DataInput;
-import com.fr.third.org.apache.lucene.util.RamUsageEstimator;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-"""
-
-TYPES = {8: "byte", 16: "short"}
-MASKS = {8: " & 0xFFL", 16: " & 0xFFFFL", 32: " & 0xFFFFFFFFL", 64: ""}
-CASTS = {8: "(byte) ", 16: "(short) ", 32: "(int) ", 64: ""}
-
-if __name__ == '__main__':
-  for bpv in TYPES.keys():
-    type
-    f = open("Packed%dThreeBlocks.java" %bpv, 'w')
-    f.write(HEADER)
-    f.write("""/**
- * Packs integers into 3 %ss (%d bits per value).
- * @lucene.internal
- */\n""" %(TYPES[bpv], bpv*3))
-    f.write("final class Packed%dThreeBlocks extends PackedInts.MutableImpl {\n" %bpv)
-    f.write("  final %s[] blocks;\n\n" %TYPES[bpv])
-
-    f.write("  public static final int MAX_SIZE = Integer.MAX_VALUE / 3;\n\n")
-
-    f.write("  Packed%dThreeBlocks(int valueCount) {\n" %bpv)
-    f.write("    super(valueCount, %d);\n" %(bpv*3))
-    f.write("    if (valueCount > MAX_SIZE) {\n")
-    f.write("      throw new ArrayIndexOutOfBoundsException(\"MAX_SIZE exceeded\");\n")
-    f.write("    }\n")
-    f.write("    blocks = new %s[valueCount * 3];\n" %TYPES[bpv])
-    f.write("  }\n\n")
-
-    f.write("  Packed%dThreeBlocks(DataInput in, int valueCount) throws IOException {\n" %bpv)
-    f.write("    this(valueCount);\n")
-    f.write("    for (int i = 0; i < 3 * valueCount; ++i) {\n")
-    f.write("      blocks[i] = in.read%s();\n" %TYPES[bpv].title())
-    f.write("    }\n")
-    f.write("    final int mod = blocks.length %% %d;\n" %(64 / bpv))
-    f.write("    if (mod != 0) {\n")
-    f.write("      for (int i = mod; i < %d; ++i) {\n" %(64 / bpv))
-    f.write("         in.read%s();\n" %TYPES[bpv].title())
-    f.write("      }\n")
-    f.write("    }\n")
-    f.write("  }\n")
-
-    f.write("""
-  @Override
-  public long get(int index) {
-    final int o = index * 3;
-    return (blocks[o]%s) << %d | (blocks[o+1]%s) << %d | (blocks[o+2]%s);
-  }
-
-  @Override
-  public int get(int index, long[] arr, int off, int len) {
-    assert len > 0 : "len must be > 0 (got " + len + ")";
-    assert index >= 0 && index < valueCount;
-    assert off + len <= arr.length;
-
-    final int gets = Math.min(valueCount - index, len);
-    for (int i = index * 3, end = (index + gets) * 3; i < end; i+=3) {
-      arr[off++] = (blocks[i]%s) << %d | (blocks[i+1]%s) << %d | (blocks[i+2]%s);
-    }
-    return gets;
-  }
-
-  @Override
-  public void set(int index, long value) {
-    final int o = index * 3;
-    blocks[o] = %s(value >>> %d);
-    blocks[o+1] = %s(value >>> %d);
-    blocks[o+2] = %svalue;
-  }
-
-  @Override
-  public int set(int index, long[] arr, int off, int len) {
-    assert len > 0 : "len must be > 0 (got " + len + ")";
-    assert index >= 0 && index < valueCount;
-    assert off + len <= arr.length;
-
-    final int sets = Math.min(valueCount - index, len);
-    for (int i = off, o = index * 3, end = off + sets; i < end; ++i) {
-      final long value = arr[i];
-      blocks[o++] = %s(value >>> %d);
-      blocks[o++] = %s(value >>> %d);
-      blocks[o++] = %svalue;
-    }
-    return sets;
-  }
-
-  @Override
-  public void fill(int fromIndex, int toIndex, long val) {
-    final %s block1 = %s(val >>> %d);
-    final %s block2 = %s(val >>> %d);
-    final %s block3 = %sval;
-    for (int i = fromIndex * 3, end = toIndex * 3; i < end; i += 3) {
-      blocks[i] = block1;
-      blocks[i+1] = block2;
-      blocks[i+2] = block3;
-    }
-  }
-
-  @Override
-  public void clear() {
-    Arrays.fill(blocks, %s0);
-  }
-
-  public long ramBytesUsed() {
-    return RamUsageEstimator.sizeOf(blocks);
-  }
-
-  @Override
-  public String toString() {
-    return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
-        + ", size=" + size() + ", elements.length=" + blocks.length + ")";
-  }
-}
-""" %(MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], MASKS[bpv], 2*bpv, MASKS[bpv], bpv, MASKS[bpv], CASTS[bpv], 2*bpv, CASTS[bpv], bpv, CASTS[bpv], CASTS[bpv],
-      2*bpv, CASTS[bpv], bpv, CASTS[bpv], TYPES[bpv], CASTS[bpv], 2*bpv, TYPES[bpv],
-      CASTS[bpv], bpv, TYPES[bpv], CASTS[bpv], CASTS[bpv]))
-
-    f.close()