alex.sung
6 years ago
70 changed files with 18776 additions and 0 deletions
@ -0,0 +1,38 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec; |
||||
|
||||
/** |
||||
* Defines common decoding methods for byte array decoders. |
||||
* |
||||
* @version $Id: BinaryDecoder.java 1379145 2012-08-30 21:02:52Z tn $ |
||||
*/ |
||||
public interface BinaryDecoder extends Decoder { |
||||
|
||||
/** |
||||
* Decodes a byte array and returns the results as a byte array. |
||||
* |
||||
* @param source |
||||
* A byte array which has been encoded with the appropriate encoder |
||||
* @return a byte array that contains decoded content |
||||
* @throws DecoderException |
||||
* A decoder exception is thrown if a Decoder encounters a failure condition during the decode process. |
||||
*/ |
||||
byte[] decode(byte[] source) throws DecoderException; |
||||
} |
||||
|
@ -0,0 +1,38 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec; |
||||
|
||||
/** |
||||
* Defines common encoding methods for byte array encoders. |
||||
* |
||||
* @version $Id: BinaryEncoder.java 1379145 2012-08-30 21:02:52Z tn $ |
||||
*/ |
||||
public interface BinaryEncoder extends Encoder { |
||||
|
||||
/** |
||||
* Encodes a byte array and return the encoded data as a byte array. |
||||
* |
||||
* @param source |
||||
* Data to be encoded |
||||
* @return A byte array containing the encoded data |
||||
* @throws EncoderException |
||||
* thrown if the Encoder encounters a failure condition during the encoding process. |
||||
*/ |
||||
byte[] encode(byte[] source) throws EncoderException; |
||||
} |
||||
|
@ -0,0 +1,113 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec; |
||||
|
||||
/** |
||||
* Character encoding names required of every implementation of the Java platform. |
||||
* |
||||
* From the Java documentation <a |
||||
* href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>: |
||||
* <p> |
||||
* <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the |
||||
* release documentation for your implementation to see if any other encodings are supported. Consult the release |
||||
* documentation for your implementation to see if any other encodings are supported.</cite> |
||||
* </p> |
||||
* |
||||
* <ul> |
||||
* <li><code>US-ASCII</code><br> |
||||
* Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li> |
||||
* <li><code>ISO-8859-1</code><br> |
||||
* ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li> |
||||
* <li><code>UTF-8</code><br> |
||||
* Eight-bit Unicode Transformation Format.</li> |
||||
* <li><code>UTF-16BE</code><br> |
||||
* Sixteen-bit Unicode Transformation Format, big-endian byte order.</li> |
||||
* <li><code>UTF-16LE</code><br> |
||||
* Sixteen-bit Unicode Transformation Format, little-endian byte order.</li> |
||||
* <li><code>UTF-16</code><br> |
||||
* Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order |
||||
* accepted on input, big-endian used on output.)</li> |
||||
* </ul> |
||||
* |
||||
* This perhaps would best belong in the [lang] project. Even if a similar interface is defined in [lang], it is not |
||||
* foreseen that [codec] would be made to depend on [lang]. |
||||
* |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @since 1.4 |
||||
* @version $Id: CharEncoding.java 1563226 2014-01-31 19:38:06Z ggregory $ |
||||
*/ |
||||
public class CharEncoding { |
||||
/** |
||||
* CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final String ISO_8859_1 = "ISO-8859-1"; |
||||
|
||||
/** |
||||
* Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final String US_ASCII = "US-ASCII"; |
||||
|
||||
/** |
||||
* Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark |
||||
* (either order accepted on input, big-endian used on output) |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final String UTF_16 = "UTF-16"; |
||||
|
||||
/** |
||||
* Sixteen-bit Unicode Transformation Format, big-endian byte order. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final String UTF_16BE = "UTF-16BE"; |
||||
|
||||
/** |
||||
* Sixteen-bit Unicode Transformation Format, little-endian byte order. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final String UTF_16LE = "UTF-16LE"; |
||||
|
||||
/** |
||||
* Eight-bit Unicode Transformation Format. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final String UTF_8 = "UTF-8"; |
||||
} |
@ -0,0 +1,168 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec; |
||||
|
||||
import java.nio.charset.Charset; |
||||
|
||||
/** |
||||
* Charsets required of every implementation of the Java platform. |
||||
* |
||||
* From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard |
||||
* charsets</a>: |
||||
* <p> |
||||
* <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the |
||||
* release documentation for your implementation to see if any other encodings are supported. Consult the release |
||||
* documentation for your implementation to see if any other encodings are supported. </cite> |
||||
* </p> |
||||
* |
||||
* <ul> |
||||
* <li><code>US-ASCII</code><br> |
||||
* Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li> |
||||
* <li><code>ISO-8859-1</code><br> |
||||
* ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li> |
||||
* <li><code>UTF-8</code><br> |
||||
* Eight-bit Unicode Transformation Format.</li> |
||||
* <li><code>UTF-16BE</code><br> |
||||
* Sixteen-bit Unicode Transformation Format, big-endian byte order.</li> |
||||
* <li><code>UTF-16LE</code><br> |
||||
* Sixteen-bit Unicode Transformation Format, little-endian byte order.</li> |
||||
* <li><code>UTF-16</code><br> |
||||
* Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order |
||||
* accepted on input, big-endian used on output.)</li> |
||||
* </ul> |
||||
* |
||||
* This perhaps would best belong in the Commons Lang project. Even if a similar class is defined in Commons Lang, it is |
||||
* not foreseen that Commons Codec would be made to depend on Commons Lang. |
||||
* |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @since 1.7 |
||||
* @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $ |
||||
*/ |
||||
public class Charsets { |
||||
|
||||
//
|
||||
// This class should only contain Charset instances for required encodings. This guarantees that it will load
|
||||
// correctly and without delay on all Java platforms.
|
||||
//
|
||||
|
||||
/** |
||||
* Returns the given Charset or the default Charset if the given Charset is null. |
||||
* |
||||
* @param charset |
||||
* A charset or null. |
||||
* @return the given Charset or the default Charset if the given Charset is null |
||||
*/ |
||||
public static Charset toCharset(final Charset charset) { |
||||
return charset == null ? Charset.defaultCharset() : charset; |
||||
} |
||||
|
||||
/** |
||||
* Returns a Charset for the named charset. If the name is null, return the default Charset. |
||||
* |
||||
* @param charset |
||||
* The name of the requested charset, may be null. |
||||
* @return a Charset for the named charset |
||||
* @throws java.nio.charset.UnsupportedCharsetException |
||||
* If the named charset is unavailable |
||||
*/ |
||||
public static Charset toCharset(final String charset) { |
||||
return charset == null ? Charset.defaultCharset() : Charset.forName(charset); |
||||
} |
||||
|
||||
/** |
||||
* CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* </p> |
||||
* <p> |
||||
* On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final Charset ISO_8859_1 = Charset.forName(CharEncoding.ISO_8859_1); |
||||
|
||||
/** |
||||
* Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* </p> |
||||
* <p> |
||||
* On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final Charset US_ASCII = Charset.forName(CharEncoding.US_ASCII); |
||||
|
||||
/** |
||||
* Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark |
||||
* (either order accepted on input, big-endian used on output) |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* </p> |
||||
* <p> |
||||
* On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final Charset UTF_16 = Charset.forName(CharEncoding.UTF_16); |
||||
|
||||
/** |
||||
* Sixteen-bit Unicode Transformation Format, big-endian byte order. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* </p> |
||||
* <p> |
||||
* On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final Charset UTF_16BE = Charset.forName(CharEncoding.UTF_16BE); |
||||
|
||||
/** |
||||
* Sixteen-bit Unicode Transformation Format, little-endian byte order. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* </p> |
||||
* <p> |
||||
* On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final Charset UTF_16LE = Charset.forName(CharEncoding.UTF_16LE); |
||||
|
||||
/** |
||||
* Eight-bit Unicode Transformation Format. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this character encoding. |
||||
* </p> |
||||
* <p> |
||||
* On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public static final Charset UTF_8 = Charset.forName(CharEncoding.UTF_8); |
||||
} |
@ -0,0 +1,47 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec; |
||||
|
||||
/** |
||||
* Provides the highest level of abstraction for Decoders. |
||||
* <p> |
||||
* This is the sister interface of {@link Encoder}. All Decoders implement this common generic interface. |
||||
* Allows a user to pass a generic Object to any Decoder implementation in the codec package. |
||||
* <p> |
||||
* One of the two interfaces at the center of the codec package. |
||||
* |
||||
* @version $Id: Decoder.java 1379145 2012-08-30 21:02:52Z tn $ |
||||
*/ |
||||
public interface Decoder { |
||||
|
||||
/** |
||||
* Decodes an "encoded" Object and returns a "decoded" Object. Note that the implementation of this interface will |
||||
* try to cast the Object parameter to the specific type expected by a particular Decoder implementation. If a |
||||
* {@link ClassCastException} occurs this decode method will throw a DecoderException. |
||||
* |
||||
* @param source |
||||
* the object to decode |
||||
* @return a 'decoded" object |
||||
* @throws DecoderException |
||||
* a decoder exception can be thrown for any number of reasons. Some good candidates are that the |
||||
* parameter passed to this method is null, a param cannot be cast to the appropriate type for a |
||||
* specific encoder. |
||||
*/ |
||||
Object decode(Object source) throws DecoderException; |
||||
} |
||||
|
@ -0,0 +1,86 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec; |
||||
|
||||
/** |
||||
* Thrown when there is a failure condition during the decoding process. This exception is thrown when a {@link Decoder} |
||||
* encounters a decoding specific exception such as invalid data, or characters outside of the expected range. |
||||
* |
||||
* @version $Id: DecoderException.java 1619948 2014-08-22 22:53:55Z ggregory $ |
||||
*/ |
||||
public class DecoderException extends Exception { |
||||
|
||||
/** |
||||
* Declares the Serial Version Uid. |
||||
* |
||||
* @see <a href="http://c2.com/cgi/wiki?AlwaysDeclareSerialVersionUid">Always Declare Serial Version Uid</a> |
||||
*/ |
||||
private static final long serialVersionUID = 1L; |
||||
|
||||
/** |
||||
* Constructs a new exception with <code>null</code> as its detail message. The cause is not initialized, and may |
||||
* subsequently be initialized by a call to {@link #initCause}. |
||||
* |
||||
* @since 1.4 |
||||
*/ |
||||
public DecoderException() { |
||||
super(); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new exception with the specified detail message. The cause is not initialized, and may subsequently |
||||
* be initialized by a call to {@link #initCause}. |
||||
* |
||||
* @param message |
||||
* The detail message which is saved for later retrieval by the {@link #getMessage()} method. |
||||
*/ |
||||
public DecoderException(final String message) { |
||||
super(message); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new exception with the specified detail message and cause. |
||||
* <p> |
||||
* Note that the detail message associated with <code>cause</code> is not automatically incorporated into this |
||||
* exception's detail message. |
||||
* |
||||
* @param message |
||||
* The detail message which is saved for later retrieval by the {@link #getMessage()} method. |
||||
* @param cause |
||||
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code> |
||||
* value is permitted, and indicates that the cause is nonexistent or unknown. |
||||
* @since 1.4 |
||||
*/ |
||||
public DecoderException(final String message, final Throwable cause) { |
||||
super(message, cause); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new exception with the specified cause and a detail message of <code>(cause==null ? |
||||
* null : cause.toString())</code> (which typically contains the class and detail message of <code>cause</code>). |
||||
* This constructor is useful for exceptions that are little more than wrappers for other throwables. |
||||
* |
||||
* @param cause |
||||
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code> |
||||
* value is permitted, and indicates that the cause is nonexistent or unknown. |
||||
* @since 1.4 |
||||
*/ |
||||
public DecoderException(final Throwable cause) { |
||||
super(cause); |
||||
} |
||||
} |
@ -0,0 +1,44 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec; |
||||
|
||||
/** |
||||
* Provides the highest level of abstraction for Encoders. |
||||
* <p> |
||||
* This is the sister interface of {@link Decoder}. Every implementation of Encoder provides this |
||||
* common generic interface which allows a user to pass a generic Object to any Encoder implementation |
||||
* in the codec package. |
||||
* |
||||
* @version $Id: Encoder.java 1379145 2012-08-30 21:02:52Z tn $ |
||||
*/ |
||||
public interface Encoder { |
||||
|
||||
/** |
||||
* Encodes an "Object" and returns the encoded content as an Object. The Objects here may just be |
||||
* <code>byte[]</code> or <code>String</code>s depending on the implementation used. |
||||
* |
||||
* @param source |
||||
* An object to encode |
||||
* @return An "encoded" Object |
||||
* @throws EncoderException |
||||
* An encoder exception is thrown if the encoder experiences a failure condition during the encoding |
||||
* process. |
||||
*/ |
||||
Object encode(Object source) throws EncoderException; |
||||
} |
||||
|
@ -0,0 +1,89 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec; |
||||
|
||||
/** |
||||
* Thrown when there is a failure condition during the encoding process. This exception is thrown when an |
||||
* {@link Encoder} encounters a encoding specific exception such as invalid data, inability to calculate a checksum, |
||||
* characters outside of the expected range. |
||||
* |
||||
* @version $Id: EncoderException.java 1619948 2014-08-22 22:53:55Z ggregory $ |
||||
*/ |
||||
public class EncoderException extends Exception { |
||||
|
||||
/** |
||||
* Declares the Serial Version Uid. |
||||
* |
||||
* @see <a href="http://c2.com/cgi/wiki?AlwaysDeclareSerialVersionUid">Always Declare Serial Version Uid</a> |
||||
*/ |
||||
private static final long serialVersionUID = 1L; |
||||
|
||||
/** |
||||
* Constructs a new exception with <code>null</code> as its detail message. The cause is not initialized, and may |
||||
* subsequently be initialized by a call to {@link #initCause}. |
||||
* |
||||
* @since 1.4 |
||||
*/ |
||||
public EncoderException() { |
||||
super(); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new exception with the specified detail message. The cause is not initialized, and may subsequently |
||||
* be initialized by a call to {@link #initCause}. |
||||
* |
||||
* @param message |
||||
* a useful message relating to the encoder specific error. |
||||
*/ |
||||
public EncoderException(final String message) { |
||||
super(message); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new exception with the specified detail message and cause. |
||||
* |
||||
* <p> |
||||
* Note that the detail message associated with <code>cause</code> is not automatically incorporated into this |
||||
* exception's detail message. |
||||
* </p> |
||||
* |
||||
* @param message |
||||
* The detail message which is saved for later retrieval by the {@link #getMessage()} method. |
||||
* @param cause |
||||
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code> |
||||
* value is permitted, and indicates that the cause is nonexistent or unknown. |
||||
* @since 1.4 |
||||
*/ |
||||
public EncoderException(final String message, final Throwable cause) { |
||||
super(message, cause); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new exception with the specified cause and a detail message of <code>(cause==null ? |
||||
* null : cause.toString())</code> (which typically contains the class and detail message of <code>cause</code>). |
||||
* This constructor is useful for exceptions that are little more than wrappers for other throwables. |
||||
* |
||||
* @param cause |
||||
* The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code> |
||||
* value is permitted, and indicates that the cause is nonexistent or unknown. |
||||
* @since 1.4 |
||||
*/ |
||||
public EncoderException(final Throwable cause) { |
||||
super(cause); |
||||
} |
||||
} |
@ -0,0 +1,38 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec; |
||||
|
||||
/** |
||||
* Defines common decoding methods for String decoders. |
||||
* |
||||
* @version $Id: StringDecoder.java 1379145 2012-08-30 21:02:52Z tn $ |
||||
*/ |
||||
public interface StringDecoder extends Decoder { |
||||
|
||||
/** |
||||
* Decodes a String and returns a String. |
||||
* |
||||
* @param source |
||||
* the String to decode |
||||
* @return the encoded String |
||||
* @throws DecoderException |
||||
* thrown if there is an error condition during the Encoding process. |
||||
*/ |
||||
String decode(String source) throws DecoderException; |
||||
} |
||||
|
@ -0,0 +1,38 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec; |
||||
|
||||
/** |
||||
* Defines common encoding methods for String encoders. |
||||
* |
||||
* @version $Id: StringEncoder.java 1379145 2012-08-30 21:02:52Z tn $ |
||||
*/ |
||||
public interface StringEncoder extends Encoder { |
||||
|
||||
/** |
||||
* Encodes a String and returns a String. |
||||
* |
||||
* @param source |
||||
* the String to encode |
||||
* @return the encoded String |
||||
* @throws EncoderException |
||||
* thrown if there is an error condition during the encoding process. |
||||
*/ |
||||
String encode(String source) throws EncoderException; |
||||
} |
||||
|
@ -0,0 +1,91 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec; |
||||
|
||||
import java.util.Comparator; |
||||
|
||||
/** |
||||
* Compares Strings using a {@link StringEncoder}. This comparator is used to sort Strings by an encoding scheme such as |
||||
* Soundex, Metaphone, etc. This class can come in handy if one need to sort Strings by an encoded form of a name such |
||||
* as Soundex. |
||||
* |
||||
* <p>This class is immutable and thread-safe.</p> |
||||
* |
||||
* @version $Id: StringEncoderComparator.java 1468177 2013-04-15 18:35:15Z ggregory $ |
||||
*/ |
||||
@SuppressWarnings("rawtypes") |
||||
// TODO ought to implement Comparator<String> but that's not possible whilst maintaining binary compatibility.
|
||||
public class StringEncoderComparator implements Comparator { |
||||
|
||||
/** |
||||
* Internal encoder instance. |
||||
*/ |
||||
private final StringEncoder stringEncoder; |
||||
|
||||
/** |
||||
* Constructs a new instance. |
||||
* |
||||
* @deprecated Creating an instance without a {@link StringEncoder} leads to a {@link NullPointerException}. Will be |
||||
* removed in 2.0. |
||||
*/ |
||||
@Deprecated |
||||
public StringEncoderComparator() { |
||||
this.stringEncoder = null; // Trying to use this will cause things to break
|
||||
} |
||||
|
||||
/** |
||||
* Constructs a new instance with the given algorithm. |
||||
* |
||||
* @param stringEncoder |
||||
* the StringEncoder used for comparisons. |
||||
*/ |
||||
public StringEncoderComparator(final StringEncoder stringEncoder) { |
||||
this.stringEncoder = stringEncoder; |
||||
} |
||||
|
||||
/** |
||||
* Compares two strings based not on the strings themselves, but on an encoding of the two strings using the |
||||
* StringEncoder this Comparator was created with. |
||||
* |
||||
* If an {@link EncoderException} is encountered, return <code>0</code>. |
||||
* |
||||
* @param o1 |
||||
* the object to compare |
||||
* @param o2 |
||||
* the object to compare to |
||||
* @return the Comparable.compareTo() return code or 0 if an encoding error was caught. |
||||
* @see Comparable |
||||
*/ |
||||
@Override |
||||
public int compare(final Object o1, final Object o2) { |
||||
|
||||
int compareCode = 0; |
||||
|
||||
try { |
||||
@SuppressWarnings("unchecked") // May fail with CCE if encode returns something that is not Comparable
|
||||
// However this was always the case.
|
||||
final Comparable<Comparable<?>> s1 = (Comparable<Comparable<?>>) this.stringEncoder.encode(o1); |
||||
final Comparable<?> s2 = (Comparable<?>) this.stringEncoder.encode(o2); |
||||
compareCode = s1.compareTo(s2); |
||||
} catch (final EncoderException ee) { |
||||
compareCode = 0; |
||||
} |
||||
return compareCode; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,544 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
/** |
||||
* Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>. |
||||
* |
||||
* <p> |
||||
* The class can be parameterized in the following manner with various constructors: |
||||
* </p> |
||||
* <ul> |
||||
* <li>Whether to use the "base32hex" variant instead of the default "base32"</li> |
||||
* <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of |
||||
* 8 in the encoded data. |
||||
* <li>Line separator: Default is CRLF ("\r\n")</li> |
||||
* </ul> |
||||
* <p> |
||||
* This class operates directly on byte streams, and not character streams. |
||||
* </p> |
||||
* <p> |
||||
* This class is thread-safe. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> |
||||
* |
||||
* @since 1.5 |
||||
* @version $Id: Base32.java 1809441 2017-09-23 16:41:53Z ggregory $ |
||||
*/ |
||||
public class Base32 extends BaseNCodec { |
||||
|
||||
/** |
||||
* BASE32 characters are 5 bits in length. |
||||
* They are formed by taking a block of five octets to form a 40-bit string, |
||||
* which is converted into eight BASE32 characters. |
||||
*/ |
||||
private static final int BITS_PER_ENCODED_BYTE = 5; |
||||
private static final int BYTES_PER_ENCODED_BLOCK = 8; |
||||
private static final int BYTES_PER_UNENCODED_BLOCK = 5; |
||||
|
||||
/** |
||||
* Chunk separator per RFC 2045 section 2.1. |
||||
* |
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> |
||||
*/ |
||||
private static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; |
||||
|
||||
/** |
||||
* This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified |
||||
* in Table 3 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the Base32 |
||||
* alphabet but fall within the bounds of the array are translated to -1. |
||||
*/ |
||||
private static final byte[] DECODE_TABLE = { |
||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
|
||||
-1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a P-Z
|
||||
-1, -1, -1, -1, -1, // 5b - 5f
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 60 - 6f a-o
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 70 - 7a p-z/**/
|
||||
}; |
||||
|
||||
/** |
||||
* This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" |
||||
* equivalents as specified in Table 3 of RFC 4648. |
||||
*/ |
||||
private static final byte[] ENCODE_TABLE = { |
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', |
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', |
||||
'2', '3', '4', '5', '6', '7', |
||||
}; |
||||
|
||||
/** |
||||
* This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as |
||||
* specified in Table 4 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the |
||||
* Base32 Hex alphabet but fall within the bounds of the array are translated to -1. |
||||
*/ |
||||
private static final byte[] HEX_DECODE_TABLE = { |
||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 2-7
|
||||
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O
|
||||
25, 26, 27, 28, 29, 30, 31, // 50-56 P-V
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f Z-_
|
||||
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f `-o
|
||||
25, 26, 27, 28, 29, 30, 31 // 70-76 p-v
|
||||
}; |
||||
|
||||
/** |
||||
* This array is a lookup table that translates 5-bit positive integer index values into their |
||||
* "Base32 Hex Alphabet" equivalents as specified in Table 4 of RFC 4648. |
||||
*/ |
||||
private static final byte[] HEX_ENCODE_TABLE = { |
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', |
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', |
||||
}; |
||||
|
||||
/** Mask used to extract 5 bits, used when encoding Base32 bytes */ |
||||
private static final int MASK_5BITS = 0x1f; |
||||
|
||||
// The static final fields above are used for the original static byte[] methods on Base32.
|
||||
// The private member fields below are used with the new streaming approach, which requires
|
||||
// some state be preserved between calls of encode() and decode().
|
||||
|
||||
/** |
||||
* Place holder for the bytes we're dealing with for our based logic. |
||||
* Bitwise operations store and extract the encoding or decoding from this variable. |
||||
*/ |
||||
|
||||
/** |
||||
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. |
||||
* <code>decodeSize = {@link #BYTES_PER_ENCODED_BLOCK} - 1 + lineSeparator.length;</code> |
||||
*/ |
||||
private final int decodeSize; |
||||
|
||||
/** |
||||
* Decode table to use. |
||||
*/ |
||||
private final byte[] decodeTable; |
||||
|
||||
/** |
||||
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. |
||||
* <code>encodeSize = {@link #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;</code> |
||||
*/ |
||||
private final int encodeSize; |
||||
|
||||
/** |
||||
* Encode table to use. |
||||
*/ |
||||
private final byte[] encodeTable; |
||||
|
||||
/** |
||||
* Line separator for encoding. Not used when decoding. Only used if lineLength > 0. |
||||
*/ |
||||
private final byte[] lineSeparator; |
||||
|
||||
/** |
||||
* Creates a Base32 codec used for decoding and encoding. |
||||
* <p> |
||||
* When encoding the line length is 0 (no chunking). |
||||
* </p> |
||||
* |
||||
*/ |
||||
public Base32() { |
||||
this(false); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base32 codec used for decoding and encoding. |
||||
* <p> |
||||
* When encoding the line length is 0 (no chunking). |
||||
* </p> |
||||
* @param pad byte used as padding byte. |
||||
*/ |
||||
public Base32(final byte pad) { |
||||
this(false, pad); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base32 codec used for decoding and encoding. |
||||
* <p> |
||||
* When encoding the line length is 0 (no chunking). |
||||
* </p> |
||||
* @param useHex if {@code true} then use Base32 Hex alphabet |
||||
*/ |
||||
public Base32(final boolean useHex) { |
||||
this(0, null, useHex, PAD_DEFAULT); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base32 codec used for decoding and encoding. |
||||
* <p> |
||||
* When encoding the line length is 0 (no chunking). |
||||
* </p> |
||||
* @param useHex if {@code true} then use Base32 Hex alphabet |
||||
* @param pad byte used as padding byte. |
||||
*/ |
||||
public Base32(final boolean useHex, final byte pad) { |
||||
this(0, null, useHex, pad); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base32 codec used for decoding and encoding. |
||||
* <p> |
||||
* When encoding the line length is given in the constructor, the line separator is CRLF. |
||||
* </p> |
||||
* |
||||
* @param lineLength |
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of |
||||
* 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when |
||||
* decoding. |
||||
*/ |
||||
public Base32(final int lineLength) { |
||||
this(lineLength, CHUNK_SEPARATOR); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base32 codec used for decoding and encoding. |
||||
* <p> |
||||
* When encoding the line length and line separator are given in the constructor. |
||||
* </p> |
||||
* <p> |
||||
* Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. |
||||
* </p> |
||||
* |
||||
* @param lineLength |
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of |
||||
* 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when |
||||
* decoding. |
||||
* @param lineSeparator |
||||
* Each line of encoded data will end with this sequence of bytes. |
||||
* @throws IllegalArgumentException |
||||
* The provided lineSeparator included some Base32 characters. That's not going to work! |
||||
*/ |
||||
public Base32(final int lineLength, final byte[] lineSeparator) { |
||||
this(lineLength, lineSeparator, false, PAD_DEFAULT); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base32 / Base32 Hex codec used for decoding and encoding. |
||||
* <p> |
||||
* When encoding the line length and line separator are given in the constructor. |
||||
* </p> |
||||
* <p> |
||||
* Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. |
||||
* </p> |
||||
* |
||||
* @param lineLength |
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of |
||||
* 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when |
||||
* decoding. |
||||
* @param lineSeparator |
||||
* Each line of encoded data will end with this sequence of bytes. |
||||
* @param useHex |
||||
* if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet |
||||
* @throws IllegalArgumentException |
||||
* The provided lineSeparator included some Base32 characters. That's not going to work! Or the |
||||
* lineLength > 0 and lineSeparator is null. |
||||
*/ |
||||
public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) { |
||||
this(lineLength, lineSeparator, useHex, PAD_DEFAULT); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base32 / Base32 Hex codec used for decoding and encoding. |
||||
* <p> |
||||
* When encoding the line length and line separator are given in the constructor. |
||||
* </p> |
||||
* <p> |
||||
* Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. |
||||
* </p> |
||||
* |
||||
* @param lineLength |
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of |
||||
* 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when |
||||
* decoding. |
||||
* @param lineSeparator |
||||
* Each line of encoded data will end with this sequence of bytes. |
||||
* @param useHex |
||||
* if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet |
||||
* @param pad byte used as padding byte. |
||||
* @throws IllegalArgumentException |
||||
* The provided lineSeparator included some Base32 characters. That's not going to work! Or the |
||||
* lineLength > 0 and lineSeparator is null. |
||||
*/ |
||||
public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte pad) { |
||||
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, |
||||
lineSeparator == null ? 0 : lineSeparator.length, pad); |
||||
if (useHex) { |
||||
this.encodeTable = HEX_ENCODE_TABLE; |
||||
this.decodeTable = HEX_DECODE_TABLE; |
||||
} else { |
||||
this.encodeTable = ENCODE_TABLE; |
||||
this.decodeTable = DECODE_TABLE; |
||||
} |
||||
if (lineLength > 0) { |
||||
if (lineSeparator == null) { |
||||
throw new IllegalArgumentException("lineLength " + lineLength + " > 0, but lineSeparator is null"); |
||||
} |
||||
// Must be done after initializing the tables
|
||||
if (containsAlphabetOrPad(lineSeparator)) { |
||||
final String sep = StringUtils.newStringUtf8(lineSeparator); |
||||
throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]"); |
||||
} |
||||
this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; |
||||
this.lineSeparator = new byte[lineSeparator.length]; |
||||
System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); |
||||
} else { |
||||
this.encodeSize = BYTES_PER_ENCODED_BLOCK; |
||||
this.lineSeparator = null; |
||||
} |
||||
this.decodeSize = this.encodeSize - 1; |
||||
|
||||
if (isInAlphabet(pad) || isWhiteSpace(pad)) { |
||||
throw new IllegalArgumentException("pad must not be in alphabet or whitespace"); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* <p> |
||||
* Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once |
||||
* with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" |
||||
* call is not necessary when decoding, but it doesn't hurt, either. |
||||
* </p> |
||||
* <p> |
||||
* Ignores all non-Base32 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are |
||||
* silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, |
||||
* garbage-out philosophy: it will not check the provided data for validity. |
||||
* </p> |
||||
* |
||||
* @param in |
||||
* byte[] array of ascii data to Base32 decode. |
||||
* @param inPos |
||||
* Position to start reading data from. |
||||
* @param inAvail |
||||
* Amount of bytes available from input for encoding. |
||||
* @param context the context to be used |
||||
* |
||||
* Output is written to {@link Context#buffer} as 8-bit octets, using {@link Context#pos} as the buffer position |
||||
*/ |
||||
@Override |
||||
void decode(final byte[] in, int inPos, final int inAvail, final Context context) { |
||||
// package protected for access from I/O streams
|
||||
|
||||
if (context.eof) { |
||||
return; |
||||
} |
||||
if (inAvail < 0) { |
||||
context.eof = true; |
||||
} |
||||
for (int i = 0; i < inAvail; i++) { |
||||
final byte b = in[inPos++]; |
||||
if (b == pad) { |
||||
// We're done.
|
||||
context.eof = true; |
||||
break; |
||||
} |
||||
final byte[] buffer = ensureBufferSize(decodeSize, context); |
||||
if (b >= 0 && b < this.decodeTable.length) { |
||||
final int result = this.decodeTable[b]; |
||||
if (result >= 0) { |
||||
context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK; |
||||
// collect decoded bytes
|
||||
context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result; |
||||
if (context.modulus == 0) { // we can output the 5 bytes
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 32) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Two forms of EOF as far as Base32 decoder is concerned: actual
|
||||
// EOF (-1) and first time '=' character is encountered in stream.
|
||||
// This approach makes the '=' padding characters completely optional.
|
||||
if (context.eof && context.modulus >= 2) { // if modulus < 2, nothing to do
|
||||
final byte[] buffer = ensureBufferSize(decodeSize, context); |
||||
|
||||
// we ignore partial bytes, i.e. only multiples of 8 count
|
||||
switch (context.modulus) { |
||||
case 2 : // 10 bits, drop 2 and output one byte
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 2) & MASK_8BITS); |
||||
break; |
||||
case 3 : // 15 bits, drop 7 and output 1 byte
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 7) & MASK_8BITS); |
||||
break; |
||||
case 4 : // 20 bits = 2*8 + 4
|
||||
context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits
|
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); |
||||
break; |
||||
case 5 : // 25bits = 3*8 + 1
|
||||
context.lbitWorkArea = context.lbitWorkArea >> 1; |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); |
||||
break; |
||||
case 6 : // 30bits = 3*8 + 6
|
||||
context.lbitWorkArea = context.lbitWorkArea >> 6; |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); |
||||
break; |
||||
case 7 : // 35 = 4*8 +3
|
||||
context.lbitWorkArea = context.lbitWorkArea >> 3; |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); |
||||
break; |
||||
default: |
||||
// modulus can be 0-7, and we excluded 0,1 already
|
||||
throw new IllegalStateException("Impossible modulus "+context.modulus); |
||||
} |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* <p> |
||||
* Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with |
||||
* the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last |
||||
* remaining bytes (if not multiple of 5). |
||||
* </p> |
||||
* |
||||
* @param in |
||||
* byte[] array of binary data to Base32 encode. |
||||
* @param inPos |
||||
* Position to start reading data from. |
||||
* @param inAvail |
||||
* Amount of bytes available from input for encoding. |
||||
* @param context the context to be used |
||||
*/ |
||||
@Override |
||||
void encode(final byte[] in, int inPos, final int inAvail, final Context context) { |
||||
// package protected for access from I/O streams
|
||||
|
||||
if (context.eof) { |
||||
return; |
||||
} |
||||
// inAvail < 0 is how we're informed of EOF in the underlying data we're
|
||||
// encoding.
|
||||
if (inAvail < 0) { |
||||
context.eof = true; |
||||
if (0 == context.modulus && lineLength == 0) { |
||||
return; // no leftovers to process and not using chunking
|
||||
} |
||||
final byte[] buffer = ensureBufferSize(encodeSize, context); |
||||
final int savedPos = context.pos; |
||||
switch (context.modulus) { // % 5
|
||||
case 0 : |
||||
break; |
||||
case 1 : // Only 1 octet; take top 5 bits then remainder
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2
|
||||
buffer[context.pos++] = pad; |
||||
buffer[context.pos++] = pad; |
||||
buffer[context.pos++] = pad; |
||||
buffer[context.pos++] = pad; |
||||
buffer[context.pos++] = pad; |
||||
buffer[context.pos++] = pad; |
||||
break; |
||||
case 2 : // 2 octets = 16 bits to use
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4
|
||||
buffer[context.pos++] = pad; |
||||
buffer[context.pos++] = pad; |
||||
buffer[context.pos++] = pad; |
||||
buffer[context.pos++] = pad; |
||||
break; |
||||
case 3 : // 3 octets = 24 bits to use
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1
|
||||
buffer[context.pos++] = pad; |
||||
buffer[context.pos++] = pad; |
||||
buffer[context.pos++] = pad; |
||||
break; |
||||
case 4 : // 4 octets = 32 bits to use
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3
|
||||
buffer[context.pos++] = pad; |
||||
break; |
||||
default: |
||||
throw new IllegalStateException("Impossible modulus "+context.modulus); |
||||
} |
||||
context.currentLinePos += context.pos - savedPos; // keep track of current line position
|
||||
// if currentPos == 0 we are at the start of a line, so don't add CRLF
|
||||
if (lineLength > 0 && context.currentLinePos > 0){ // add chunk separator if required
|
||||
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); |
||||
context.pos += lineSeparator.length; |
||||
} |
||||
} else { |
||||
for (int i = 0; i < inAvail; i++) { |
||||
final byte[] buffer = ensureBufferSize(encodeSize, context); |
||||
context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK; |
||||
int b = in[inPos++]; |
||||
if (b < 0) { |
||||
b += 256; |
||||
} |
||||
context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE
|
||||
if (0 == context.modulus) { // we have enough bytes to create our output
|
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 35) & MASK_5BITS]; |
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 30) & MASK_5BITS]; |
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 25) & MASK_5BITS]; |
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 20) & MASK_5BITS]; |
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 15) & MASK_5BITS]; |
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 10) & MASK_5BITS]; |
||||
buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 5) & MASK_5BITS]; |
||||
buffer[context.pos++] = encodeTable[(int)context.lbitWorkArea & MASK_5BITS]; |
||||
context.currentLinePos += BYTES_PER_ENCODED_BLOCK; |
||||
if (lineLength > 0 && lineLength <= context.currentLinePos) { |
||||
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); |
||||
context.pos += lineSeparator.length; |
||||
context.currentLinePos = 0; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Returns whether or not the {@code octet} is in the Base32 alphabet. |
||||
* |
||||
* @param octet |
||||
* The value to test |
||||
* @return {@code true} if the value is defined in the the Base32 alphabet {@code false} otherwise. |
||||
*/ |
||||
@Override |
||||
public boolean isInAlphabet(final byte octet) { |
||||
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; |
||||
} |
||||
} |
@ -0,0 +1,85 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
import java.io.InputStream; |
||||
|
||||
/** |
||||
* Provides Base32 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength |
||||
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate |
||||
* constructor. |
||||
* <p> |
||||
* The default behaviour of the Base32InputStream is to DECODE, whereas the default behaviour of the Base32OutputStream |
||||
* is to ENCODE, but this behaviour can be overridden by using a different constructor. |
||||
* </p> |
||||
* <p> |
||||
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode |
||||
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). |
||||
* </p> |
||||
* |
||||
* @version $Id: Base32InputStream.java 1586299 2014-04-10 13:50:21Z ggregory $ |
||||
* @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> |
||||
* @since 1.5 |
||||
*/ |
||||
public class Base32InputStream extends BaseNCodecInputStream { |
||||
|
||||
/** |
||||
* Creates a Base32InputStream such that all data read is Base32-decoded from the original provided InputStream. |
||||
* |
||||
* @param in |
||||
* InputStream to wrap. |
||||
*/ |
||||
public Base32InputStream(final InputStream in) { |
||||
this(in, false); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base32InputStream such that all data read is either Base32-encoded or Base32-decoded from the original |
||||
* provided InputStream. |
||||
* |
||||
* @param in |
||||
* InputStream to wrap. |
||||
* @param doEncode |
||||
* true if we should encode all data read from us, false if we should decode. |
||||
*/ |
||||
public Base32InputStream(final InputStream in, final boolean doEncode) { |
||||
super(in, new Base32(false), doEncode); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base32InputStream such that all data read is either Base32-encoded or Base32-decoded from the original |
||||
* provided InputStream. |
||||
* |
||||
* @param in |
||||
* InputStream to wrap. |
||||
* @param doEncode |
||||
* true if we should encode all data read from us, false if we should decode. |
||||
* @param lineLength |
||||
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to |
||||
* nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode |
||||
* is false, lineLength is ignored. |
||||
* @param lineSeparator |
||||
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n). |
||||
* If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored. |
||||
*/ |
||||
public Base32InputStream(final InputStream in, final boolean doEncode, |
||||
final int lineLength, final byte[] lineSeparator) { |
||||
super(in, new Base32(lineLength, lineSeparator), doEncode); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,89 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
import java.io.OutputStream; |
||||
|
||||
/** |
||||
* Provides Base32 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength |
||||
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate |
||||
* constructor. |
||||
* <p> |
||||
* The default behaviour of the Base32OutputStream is to ENCODE, whereas the default behaviour of the Base32InputStream |
||||
* is to DECODE. But this behaviour can be overridden by using a different constructor. |
||||
* </p> |
||||
* <p> |
||||
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode |
||||
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). |
||||
* </p> |
||||
* <p> |
||||
* <b>Note:</b> It is mandatory to close the stream after the last byte has been written to it, otherwise the |
||||
* final padding will be omitted and the resulting data will be incomplete/inconsistent. |
||||
* </p> |
||||
* |
||||
* @version $Id: Base32OutputStream.java 1635952 2014-11-01 14:19:04Z tn $ |
||||
* @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> |
||||
* @since 1.5 |
||||
*/ |
||||
public class Base32OutputStream extends BaseNCodecOutputStream { |
||||
|
||||
/** |
||||
* Creates a Base32OutputStream such that all data written is Base32-encoded to the original provided OutputStream. |
||||
* |
||||
* @param out |
||||
* OutputStream to wrap. |
||||
*/ |
||||
public Base32OutputStream(final OutputStream out) { |
||||
this(out, true); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base32OutputStream such that all data written is either Base32-encoded or Base32-decoded to the |
||||
* original provided OutputStream. |
||||
* |
||||
* @param out |
||||
* OutputStream to wrap. |
||||
* @param doEncode |
||||
* true if we should encode all data written to us, false if we should decode. |
||||
*/ |
||||
public Base32OutputStream(final OutputStream out, final boolean doEncode) { |
||||
super(out, new Base32(false), doEncode); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base32OutputStream such that all data written is either Base32-encoded or Base32-decoded to the |
||||
* original provided OutputStream. |
||||
* |
||||
* @param out |
||||
* OutputStream to wrap. |
||||
* @param doEncode |
||||
* true if we should encode all data written to us, false if we should decode. |
||||
* @param lineLength |
||||
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to |
||||
* nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode |
||||
* is false, lineLength is ignored. |
||||
* @param lineSeparator |
||||
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n). |
||||
* If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored. |
||||
*/ |
||||
public Base32OutputStream(final OutputStream out, final boolean doEncode, |
||||
final int lineLength, final byte[] lineSeparator) { |
||||
super(out, new Base32(lineLength, lineSeparator), doEncode); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,785 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
import java.math.BigInteger; |
||||
|
||||
/** |
||||
* Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>. |
||||
* |
||||
* <p> |
||||
* This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose |
||||
* Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. |
||||
* </p> |
||||
* <p> |
||||
* The class can be parameterized in the following manner with various constructors: |
||||
* </p> |
||||
* <ul> |
||||
* <li>URL-safe mode: Default off.</li> |
||||
* <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of |
||||
* 4 in the encoded data. |
||||
* <li>Line separator: Default is CRLF ("\r\n")</li> |
||||
* </ul> |
||||
* <p> |
||||
* The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes. |
||||
* </p> |
||||
* <p> |
||||
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only |
||||
* encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, |
||||
* UTF-8, etc). |
||||
* </p> |
||||
* <p> |
||||
* This class is thread-safe. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> |
||||
* @since 1.0 |
||||
* @version $Id: Base64.java 1789158 2017-03-28 15:04:58Z sebb $ |
||||
*/ |
||||
public class Base64 extends BaseNCodec { |
||||
|
||||
/** |
||||
* BASE32 characters are 6 bits in length. |
||||
* They are formed by taking a block of 3 octets to form a 24-bit string, |
||||
* which is converted into 4 BASE64 characters. |
||||
*/ |
||||
private static final int BITS_PER_ENCODED_BYTE = 6; |
||||
private static final int BYTES_PER_UNENCODED_BLOCK = 3; |
||||
private static final int BYTES_PER_ENCODED_BLOCK = 4; |
||||
|
||||
/** |
||||
* Chunk separator per RFC 2045 section 2.1. |
||||
* |
||||
* <p> |
||||
* N.B. The next major release may break compatibility and make this field private. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> |
||||
*/ |
||||
static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; |
||||
|
||||
/** |
||||
* This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet" |
||||
* equivalents as specified in Table 1 of RFC 2045. |
||||
* |
||||
* Thanks to "commons" project in ws.apache.org for this code. |
||||
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
|
||||
*/ |
||||
private static final byte[] STANDARD_ENCODE_TABLE = { |
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', |
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', |
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', |
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', |
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' |
||||
}; |
||||
|
||||
/** |
||||
* This is a copy of the STANDARD_ENCODE_TABLE above, but with + and / |
||||
* changed to - and _ to make the encoded Base64 results more URL-SAFE. |
||||
* This table is only used when the Base64's mode is set to URL-SAFE. |
||||
*/ |
||||
private static final byte[] URL_SAFE_ENCODE_TABLE = { |
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', |
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', |
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', |
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', |
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' |
||||
}; |
||||
|
||||
/** |
||||
* This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified |
||||
* in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64 |
||||
* alphabet but fall within the bounds of the array are translated to -1. |
||||
* |
||||
* Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both |
||||
* URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit). |
||||
* |
||||
* Thanks to "commons" project in ws.apache.org for this code. |
||||
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
|
||||
*/ |
||||
private static final byte[] DECODE_TABLE = { |
||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - /
|
||||
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
|
||||
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z
|
||||
}; |
||||
|
||||
/** |
||||
* Base64 uses 6-bit fields. |
||||
*/ |
||||
/** Mask used to extract 6 bits, used when encoding */ |
||||
private static final int MASK_6BITS = 0x3f; |
||||
|
||||
// The static final fields above are used for the original static byte[] methods on Base64.
|
||||
// The private member fields below are used with the new streaming approach, which requires
|
||||
// some state be preserved between calls of encode() and decode().
|
||||
|
||||
/** |
||||
* Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able |
||||
* to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch |
||||
* between the two modes. |
||||
*/ |
||||
private final byte[] encodeTable; |
||||
|
||||
// Only one decode table currently; keep for consistency with Base32 code
|
||||
private final byte[] decodeTable = DECODE_TABLE; |
||||
|
||||
/** |
||||
* Line separator for encoding. Not used when decoding. Only used if lineLength > 0. |
||||
*/ |
||||
private final byte[] lineSeparator; |
||||
|
||||
/** |
||||
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. |
||||
* <code>decodeSize = 3 + lineSeparator.length;</code> |
||||
*/ |
||||
private final int decodeSize; |
||||
|
||||
/** |
||||
* Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. |
||||
* <code>encodeSize = 4 + lineSeparator.length;</code> |
||||
*/ |
||||
private final int encodeSize; |
||||
|
||||
/** |
||||
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. |
||||
* <p> |
||||
* When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE. |
||||
* </p> |
||||
* |
||||
* <p> |
||||
* When decoding all variants are supported. |
||||
* </p> |
||||
*/ |
||||
public Base64() { |
||||
this(0); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode. |
||||
* <p> |
||||
* When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. |
||||
* </p> |
||||
* |
||||
* <p> |
||||
* When decoding all variants are supported. |
||||
* </p> |
||||
* |
||||
* @param urlSafe |
||||
* if <code>true</code>, URL-safe encoding is used. In most cases this should be set to |
||||
* <code>false</code>. |
||||
* @since 1.4 |
||||
*/ |
||||
public Base64(final boolean urlSafe) { |
||||
this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. |
||||
* <p> |
||||
* When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is |
||||
* STANDARD_ENCODE_TABLE. |
||||
* </p> |
||||
* <p> |
||||
* Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. |
||||
* </p> |
||||
* <p> |
||||
* When decoding all variants are supported. |
||||
* </p> |
||||
* |
||||
* @param lineLength |
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of |
||||
* 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when |
||||
* decoding. |
||||
* @since 1.4 |
||||
*/ |
||||
public Base64(final int lineLength) { |
||||
this(lineLength, CHUNK_SEPARATOR); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. |
||||
* <p> |
||||
* When encoding the line length and line separator are given in the constructor, and the encoding table is |
||||
* STANDARD_ENCODE_TABLE. |
||||
* </p> |
||||
* <p> |
||||
* Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. |
||||
* </p> |
||||
* <p> |
||||
* When decoding all variants are supported. |
||||
* </p> |
||||
* |
||||
* @param lineLength |
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of |
||||
* 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when |
||||
* decoding. |
||||
* @param lineSeparator |
||||
* Each line of encoded data will end with this sequence of bytes. |
||||
* @throws IllegalArgumentException |
||||
* Thrown when the provided lineSeparator included some base64 characters. |
||||
* @since 1.4 |
||||
*/ |
||||
public Base64(final int lineLength, final byte[] lineSeparator) { |
||||
this(lineLength, lineSeparator, false); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. |
||||
* <p> |
||||
* When encoding the line length and line separator are given in the constructor, and the encoding table is |
||||
* STANDARD_ENCODE_TABLE. |
||||
* </p> |
||||
* <p> |
||||
* Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. |
||||
* </p> |
||||
* <p> |
||||
* When decoding all variants are supported. |
||||
* </p> |
||||
* |
||||
* @param lineLength |
||||
* Each line of encoded data will be at most of the given length (rounded down to nearest multiple of |
||||
* 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when |
||||
* decoding. |
||||
* @param lineSeparator |
||||
* Each line of encoded data will end with this sequence of bytes. |
||||
* @param urlSafe |
||||
* Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode |
||||
* operations. Decoding seamlessly handles both modes. |
||||
* <b>Note: no padding is added when using the URL-safe alphabet.</b> |
||||
* @throws IllegalArgumentException |
||||
* The provided lineSeparator included some base64 characters. That's not going to work! |
||||
* @since 1.4 |
||||
*/ |
||||
public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) { |
||||
super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, |
||||
lineLength, |
||||
lineSeparator == null ? 0 : lineSeparator.length); |
||||
// TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
|
||||
// @see test case Base64Test.testConstructors()
|
||||
if (lineSeparator != null) { |
||||
if (containsAlphabetOrPad(lineSeparator)) { |
||||
final String sep = StringUtils.newStringUtf8(lineSeparator); |
||||
throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]"); |
||||
} |
||||
if (lineLength > 0){ // null line-sep forces no chunking rather than throwing IAE
|
||||
this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; |
||||
this.lineSeparator = new byte[lineSeparator.length]; |
||||
System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); |
||||
} else { |
||||
this.encodeSize = BYTES_PER_ENCODED_BLOCK; |
||||
this.lineSeparator = null; |
||||
} |
||||
} else { |
||||
this.encodeSize = BYTES_PER_ENCODED_BLOCK; |
||||
this.lineSeparator = null; |
||||
} |
||||
this.decodeSize = this.encodeSize - 1; |
||||
this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE; |
||||
} |
||||
|
||||
/** |
||||
* Returns our current encode mode. True if we're URL-SAFE, false otherwise. |
||||
* |
||||
* @return true if we're in URL-SAFE mode, false otherwise. |
||||
* @since 1.4 |
||||
*/ |
||||
public boolean isUrlSafe() { |
||||
return this.encodeTable == URL_SAFE_ENCODE_TABLE; |
||||
} |
||||
|
||||
/** |
||||
* <p> |
||||
* Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with |
||||
* the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last |
||||
* remaining bytes (if not multiple of 3). |
||||
* </p> |
||||
* <p><b>Note: no padding is added when encoding using the URL-safe alphabet.</b></p> |
||||
* <p> |
||||
* Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. |
||||
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
|
||||
* </p> |
||||
* |
||||
* @param in |
||||
* byte[] array of binary data to base64 encode. |
||||
* @param inPos |
||||
* Position to start reading data from. |
||||
* @param inAvail |
||||
* Amount of bytes available from input for encoding. |
||||
* @param context |
||||
* the context to be used |
||||
*/ |
||||
@Override |
||||
void encode(final byte[] in, int inPos, final int inAvail, final Context context) { |
||||
if (context.eof) { |
||||
return; |
||||
} |
||||
// inAvail < 0 is how we're informed of EOF in the underlying data we're
|
||||
// encoding.
|
||||
if (inAvail < 0) { |
||||
context.eof = true; |
||||
if (0 == context.modulus && lineLength == 0) { |
||||
return; // no leftovers to process and not using chunking
|
||||
} |
||||
final byte[] buffer = ensureBufferSize(encodeSize, context); |
||||
final int savedPos = context.pos; |
||||
switch (context.modulus) { // 0-2
|
||||
case 0 : // nothing to do here
|
||||
break; |
||||
case 1 : // 8 bits = 6 + 2
|
||||
// top 6 bits:
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS]; |
||||
// remaining 2:
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS]; |
||||
// URL-SAFE skips the padding to further reduce size.
|
||||
if (encodeTable == STANDARD_ENCODE_TABLE) { |
||||
buffer[context.pos++] = pad; |
||||
buffer[context.pos++] = pad; |
||||
} |
||||
break; |
||||
|
||||
case 2 : // 16 bits = 6 + 6 + 4
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS]; |
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS]; |
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS]; |
||||
// URL-SAFE skips the padding to further reduce size.
|
||||
if (encodeTable == STANDARD_ENCODE_TABLE) { |
||||
buffer[context.pos++] = pad; |
||||
} |
||||
break; |
||||
default: |
||||
throw new IllegalStateException("Impossible modulus "+context.modulus); |
||||
} |
||||
context.currentLinePos += context.pos - savedPos; // keep track of current line position
|
||||
// if currentPos == 0 we are at the start of a line, so don't add CRLF
|
||||
if (lineLength > 0 && context.currentLinePos > 0) { |
||||
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); |
||||
context.pos += lineSeparator.length; |
||||
} |
||||
} else { |
||||
for (int i = 0; i < inAvail; i++) { |
||||
final byte[] buffer = ensureBufferSize(encodeSize, context); |
||||
context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK; |
||||
int b = in[inPos++]; |
||||
if (b < 0) { |
||||
b += 256; |
||||
} |
||||
context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE
|
||||
if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract
|
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS]; |
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS]; |
||||
buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS]; |
||||
buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS]; |
||||
context.currentLinePos += BYTES_PER_ENCODED_BLOCK; |
||||
if (lineLength > 0 && lineLength <= context.currentLinePos) { |
||||
System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); |
||||
context.pos += lineSeparator.length; |
||||
context.currentLinePos = 0; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* <p> |
||||
* Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once |
||||
* with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" |
||||
* call is not necessary when decoding, but it doesn't hurt, either. |
||||
* </p> |
||||
* <p> |
||||
* Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are |
||||
* silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, |
||||
* garbage-out philosophy: it will not check the provided data for validity. |
||||
* </p> |
||||
* <p> |
||||
* Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. |
||||
* http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
|
||||
* </p> |
||||
* |
||||
* @param in |
||||
* byte[] array of ascii data to base64 decode. |
||||
* @param inPos |
||||
* Position to start reading data from. |
||||
* @param inAvail |
||||
* Amount of bytes available from input for encoding. |
||||
* @param context |
||||
* the context to be used |
||||
*/ |
||||
@Override |
||||
void decode(final byte[] in, int inPos, final int inAvail, final Context context) { |
||||
if (context.eof) { |
||||
return; |
||||
} |
||||
if (inAvail < 0) { |
||||
context.eof = true; |
||||
} |
||||
for (int i = 0; i < inAvail; i++) { |
||||
final byte[] buffer = ensureBufferSize(decodeSize, context); |
||||
final byte b = in[inPos++]; |
||||
if (b == pad) { |
||||
// We're done.
|
||||
context.eof = true; |
||||
break; |
||||
} |
||||
if (b >= 0 && b < DECODE_TABLE.length) { |
||||
final int result = DECODE_TABLE[b]; |
||||
if (result >= 0) { |
||||
context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK; |
||||
context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result; |
||||
if (context.modulus == 0) { |
||||
buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Two forms of EOF as far as base64 decoder is concerned: actual
|
||||
// EOF (-1) and first time '=' character is encountered in stream.
|
||||
// This approach makes the '=' padding characters completely optional.
|
||||
if (context.eof && context.modulus != 0) { |
||||
final byte[] buffer = ensureBufferSize(decodeSize, context); |
||||
|
||||
// We have some spare bits remaining
|
||||
// Output all whole multiples of 8 bits and ignore the rest
|
||||
switch (context.modulus) { |
||||
// case 0 : // impossible, as excluded above
|
||||
case 1 : // 6 bits - ignore entirely
|
||||
// TODO not currently tested; perhaps it is impossible?
|
||||
break; |
||||
case 2 : // 12 bits = 8 + 4
|
||||
context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits
|
||||
buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); |
||||
break; |
||||
case 3 : // 18 bits = 8 + 8 + 2
|
||||
context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
|
||||
buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); |
||||
buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); |
||||
break; |
||||
default: |
||||
throw new IllegalStateException("Impossible modulus "+context.modulus); |
||||
} |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the |
||||
* method treats whitespace as valid. |
||||
* |
||||
* @param arrayOctet |
||||
* byte array to test |
||||
* @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; |
||||
* <code>false</code>, otherwise |
||||
* @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0. |
||||
*/ |
||||
@Deprecated |
||||
public static boolean isArrayByteBase64(final byte[] arrayOctet) { |
||||
return isBase64(arrayOctet); |
||||
} |
||||
|
||||
/** |
||||
* Returns whether or not the <code>octet</code> is in the base 64 alphabet. |
||||
* |
||||
* @param octet |
||||
* The value to test |
||||
* @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise. |
||||
* @since 1.4 |
||||
*/ |
||||
public static boolean isBase64(final byte octet) { |
||||
return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1); |
||||
} |
||||
|
||||
/** |
||||
* Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the |
||||
* method treats whitespace as valid. |
||||
* |
||||
* @param base64 |
||||
* String to test |
||||
* @return <code>true</code> if all characters in the String are valid characters in the Base64 alphabet or if |
||||
* the String is empty; <code>false</code>, otherwise |
||||
* @since 1.5 |
||||
*/ |
||||
public static boolean isBase64(final String base64) { |
||||
return isBase64(StringUtils.getBytesUtf8(base64)); |
||||
} |
||||
|
||||
/** |
||||
* Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the |
||||
* method treats whitespace as valid. |
||||
* |
||||
* @param arrayOctet |
||||
* byte array to test |
||||
* @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; |
||||
* <code>false</code>, otherwise |
||||
* @since 1.5 |
||||
*/ |
||||
public static boolean isBase64(final byte[] arrayOctet) { |
||||
for (int i = 0; i < arrayOctet.length; i++) { |
||||
if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) { |
||||
return false; |
||||
} |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
/** |
||||
* Encodes binary data using the base64 algorithm but does not chunk the output. |
||||
* |
||||
* @param binaryData |
||||
* binary data to encode |
||||
* @return byte[] containing Base64 characters in their UTF-8 representation. |
||||
*/ |
||||
public static byte[] encodeBase64(final byte[] binaryData) { |
||||
return encodeBase64(binaryData, false); |
||||
} |
||||
|
||||
/** |
||||
* Encodes binary data using the base64 algorithm but does not chunk the output. |
||||
* |
||||
* NOTE: We changed the behaviour of this method from multi-line chunking (commons-codec-1.4) to |
||||
* single-line non-chunking (commons-codec-1.5). |
||||
* |
||||
* @param binaryData |
||||
* binary data to encode |
||||
* @return String containing Base64 characters. |
||||
* @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not). |
||||
*/ |
||||
public static String encodeBase64String(final byte[] binaryData) { |
||||
return StringUtils.newStringUsAscii(encodeBase64(binaryData, false)); |
||||
} |
||||
|
||||
/** |
||||
* Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The |
||||
* url-safe variation emits - and _ instead of + and / characters. |
||||
* <b>Note: no padding is added.</b> |
||||
* @param binaryData |
||||
* binary data to encode |
||||
* @return byte[] containing Base64 characters in their UTF-8 representation. |
||||
* @since 1.4 |
||||
*/ |
||||
public static byte[] encodeBase64URLSafe(final byte[] binaryData) { |
||||
return encodeBase64(binaryData, false, true); |
||||
} |
||||
|
||||
/** |
||||
* Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The |
||||
* url-safe variation emits - and _ instead of + and / characters. |
||||
* <b>Note: no padding is added.</b> |
||||
* @param binaryData |
||||
* binary data to encode |
||||
* @return String containing Base64 characters |
||||
* @since 1.4 |
||||
*/ |
||||
public static String encodeBase64URLSafeString(final byte[] binaryData) { |
||||
return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true)); |
||||
} |
||||
|
||||
/** |
||||
* Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks |
||||
* |
||||
* @param binaryData |
||||
* binary data to encode |
||||
* @return Base64 characters chunked in 76 character blocks |
||||
*/ |
||||
public static byte[] encodeBase64Chunked(final byte[] binaryData) { |
||||
return encodeBase64(binaryData, true); |
||||
} |
||||
|
||||
/** |
||||
* Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. |
||||
* |
||||
* @param binaryData |
||||
* Array containing binary data to encode. |
||||
* @param isChunked |
||||
* if <code>true</code> this encoder will chunk the base64 output into 76 character blocks |
||||
* @return Base64-encoded data. |
||||
* @throws IllegalArgumentException |
||||
* Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} |
||||
*/ |
||||
public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) { |
||||
return encodeBase64(binaryData, isChunked, false); |
||||
} |
||||
|
||||
/** |
||||
* Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. |
||||
* |
||||
* @param binaryData |
||||
* Array containing binary data to encode. |
||||
* @param isChunked |
||||
* if <code>true</code> this encoder will chunk the base64 output into 76 character blocks |
||||
* @param urlSafe |
||||
* if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters. |
||||
* <b>Note: no padding is added when encoding using the URL-safe alphabet.</b> |
||||
* @return Base64-encoded data. |
||||
* @throws IllegalArgumentException |
||||
* Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} |
||||
* @since 1.4 |
||||
*/ |
||||
public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) { |
||||
return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE); |
||||
} |
||||
|
||||
/** |
||||
* Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. |
||||
* |
||||
* @param binaryData |
||||
* Array containing binary data to encode. |
||||
* @param isChunked |
||||
* if <code>true</code> this encoder will chunk the base64 output into 76 character blocks |
||||
* @param urlSafe |
||||
* if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters. |
||||
* <b>Note: no padding is added when encoding using the URL-safe alphabet.</b> |
||||
* @param maxResultSize |
||||
* The maximum result size to accept. |
||||
* @return Base64-encoded data. |
||||
* @throws IllegalArgumentException |
||||
* Thrown when the input array needs an output array bigger than maxResultSize |
||||
* @since 1.4 |
||||
*/ |
||||
public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, |
||||
final boolean urlSafe, final int maxResultSize) { |
||||
if (binaryData == null || binaryData.length == 0) { |
||||
return binaryData; |
||||
} |
||||
|
||||
// Create this so can use the super-class method
|
||||
// Also ensures that the same roundings are performed by the ctor and the code
|
||||
final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe); |
||||
final long len = b64.getEncodedLength(binaryData); |
||||
if (len > maxResultSize) { |
||||
throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + |
||||
len + |
||||
") than the specified maximum size of " + |
||||
maxResultSize); |
||||
} |
||||
|
||||
return b64.encode(binaryData); |
||||
} |
||||
|
||||
/** |
||||
* Decodes a Base64 String into octets. |
||||
* <p> |
||||
* <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode. |
||||
* </p> |
||||
* |
||||
* @param base64String |
||||
* String containing Base64 data |
||||
* @return Array containing decoded data. |
||||
* @since 1.4 |
||||
*/ |
||||
public static byte[] decodeBase64(final String base64String) { |
||||
return new Base64().decode(base64String); |
||||
} |
||||
|
||||
/** |
||||
* Decodes Base64 data into octets. |
||||
* <p> |
||||
* <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode. |
||||
* </p> |
||||
* |
||||
* @param base64Data |
||||
* Byte array containing Base64 data |
||||
* @return Array containing decoded data. |
||||
*/ |
||||
public static byte[] decodeBase64(final byte[] base64Data) { |
||||
return new Base64().decode(base64Data); |
||||
} |
||||
|
||||
// Implementation of the Encoder Interface
|
||||
|
||||
// Implementation of integer encoding used for crypto
|
||||
/** |
||||
* Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature. |
||||
* |
||||
* @param pArray |
||||
* a byte array containing base64 character data |
||||
* @return A BigInteger |
||||
* @since 1.4 |
||||
*/ |
||||
public static BigInteger decodeInteger(final byte[] pArray) { |
||||
return new BigInteger(1, decodeBase64(pArray)); |
||||
} |
||||
|
||||
/** |
||||
* Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature. |
||||
* |
||||
* @param bigInt |
||||
* a BigInteger |
||||
* @return A byte array containing base64 character data |
||||
* @throws NullPointerException |
||||
* if null is passed in |
||||
* @since 1.4 |
||||
*/ |
||||
public static byte[] encodeInteger(final BigInteger bigInt) { |
||||
if (bigInt == null) { |
||||
throw new NullPointerException("encodeInteger called with null parameter"); |
||||
} |
||||
return encodeBase64(toIntegerBytes(bigInt), false); |
||||
} |
||||
|
||||
/** |
||||
* Returns a byte-array representation of a <code>BigInteger</code> without sign bit. |
||||
* |
||||
* @param bigInt |
||||
* <code>BigInteger</code> to be converted |
||||
* @return a byte array representation of the BigInteger parameter |
||||
*/ |
||||
static byte[] toIntegerBytes(final BigInteger bigInt) { |
||||
int bitlen = bigInt.bitLength(); |
||||
// round bitlen
|
||||
bitlen = ((bitlen + 7) >> 3) << 3; |
||||
final byte[] bigBytes = bigInt.toByteArray(); |
||||
|
||||
if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) { |
||||
return bigBytes; |
||||
} |
||||
// set up params for copying everything but sign bit
|
||||
int startSrc = 0; |
||||
int len = bigBytes.length; |
||||
|
||||
// if bigInt is exactly byte-aligned, just skip signbit in copy
|
||||
if ((bigInt.bitLength() % 8) == 0) { |
||||
startSrc = 1; |
||||
len--; |
||||
} |
||||
final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
|
||||
final byte[] resizedBytes = new byte[bitlen / 8]; |
||||
System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len); |
||||
return resizedBytes; |
||||
} |
||||
|
||||
/** |
||||
* Returns whether or not the <code>octet</code> is in the Base64 alphabet. |
||||
* |
||||
* @param octet |
||||
* The value to test |
||||
* @return <code>true</code> if the value is defined in the the Base64 alphabet <code>false</code> otherwise. |
||||
*/ |
||||
@Override |
||||
protected boolean isInAlphabet(final byte octet) { |
||||
return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,88 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
import java.io.InputStream; |
||||
|
||||
/** |
||||
* Provides Base64 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength |
||||
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate |
||||
* constructor. |
||||
* <p> |
||||
* The default behaviour of the Base64InputStream is to DECODE, whereas the default behaviour of the Base64OutputStream |
||||
* is to ENCODE, but this behaviour can be overridden by using a different constructor. |
||||
* </p> |
||||
* <p> |
||||
* This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose |
||||
* Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. |
||||
* </p> |
||||
* <p> |
||||
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode |
||||
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). |
||||
* </p> |
||||
* |
||||
* @version $Id: Base64InputStream.java 1634429 2014-10-27 01:08:36Z ggregory $ |
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> |
||||
* @since 1.4 |
||||
*/ |
||||
public class Base64InputStream extends BaseNCodecInputStream { |
||||
|
||||
/** |
||||
* Creates a Base64InputStream such that all data read is Base64-decoded from the original provided InputStream. |
||||
* |
||||
* @param in |
||||
* InputStream to wrap. |
||||
*/ |
||||
public Base64InputStream(final InputStream in) { |
||||
this(in, false); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base64InputStream such that all data read is either Base64-encoded or Base64-decoded from the original |
||||
* provided InputStream. |
||||
* |
||||
* @param in |
||||
* InputStream to wrap. |
||||
* @param doEncode |
||||
* true if we should encode all data read from us, false if we should decode. |
||||
*/ |
||||
public Base64InputStream(final InputStream in, final boolean doEncode) { |
||||
super(in, new Base64(false), doEncode); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base64InputStream such that all data read is either Base64-encoded or Base64-decoded from the original |
||||
* provided InputStream. |
||||
* |
||||
* @param in |
||||
* InputStream to wrap. |
||||
* @param doEncode |
||||
* true if we should encode all data read from us, false if we should decode. |
||||
* @param lineLength |
||||
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to |
||||
* nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode |
||||
* is false, lineLength is ignored. |
||||
* @param lineSeparator |
||||
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n). |
||||
* If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored. |
||||
*/ |
||||
public Base64InputStream(final InputStream in, final boolean doEncode, |
||||
final int lineLength, final byte[] lineSeparator) { |
||||
super(in, new Base64(lineLength, lineSeparator), doEncode); |
||||
} |
||||
} |
@ -0,0 +1,92 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
import java.io.OutputStream; |
||||
|
||||
/** |
||||
* Provides Base64 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength |
||||
* is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate |
||||
* constructor. |
||||
* <p> |
||||
* The default behaviour of the Base64OutputStream is to ENCODE, whereas the default behaviour of the Base64InputStream |
||||
* is to DECODE. But this behaviour can be overridden by using a different constructor. |
||||
* </p> |
||||
* <p> |
||||
* This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose |
||||
* Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. |
||||
* </p> |
||||
* <p> |
||||
* Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode |
||||
* character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). |
||||
* </p> |
||||
* <p> |
||||
* <b>Note:</b> It is mandatory to close the stream after the last byte has been written to it, otherwise the |
||||
* final padding will be omitted and the resulting data will be incomplete/inconsistent. |
||||
* </p> |
||||
* |
||||
* @version $Id: Base64OutputStream.java 1635952 2014-11-01 14:19:04Z tn $ |
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> |
||||
* @since 1.4 |
||||
*/ |
||||
public class Base64OutputStream extends BaseNCodecOutputStream { |
||||
|
||||
/** |
||||
* Creates a Base64OutputStream such that all data written is Base64-encoded to the original provided OutputStream. |
||||
* |
||||
* @param out |
||||
* OutputStream to wrap. |
||||
*/ |
||||
public Base64OutputStream(final OutputStream out) { |
||||
this(out, true); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base64OutputStream such that all data written is either Base64-encoded or Base64-decoded to the |
||||
* original provided OutputStream. |
||||
* |
||||
* @param out |
||||
* OutputStream to wrap. |
||||
* @param doEncode |
||||
* true if we should encode all data written to us, false if we should decode. |
||||
*/ |
||||
public Base64OutputStream(final OutputStream out, final boolean doEncode) { |
||||
super(out,new Base64(false), doEncode); |
||||
} |
||||
|
||||
/** |
||||
* Creates a Base64OutputStream such that all data written is either Base64-encoded or Base64-decoded to the |
||||
* original provided OutputStream. |
||||
* |
||||
* @param out |
||||
* OutputStream to wrap. |
||||
* @param doEncode |
||||
* true if we should encode all data written to us, false if we should decode. |
||||
* @param lineLength |
||||
* If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to |
||||
* nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode |
||||
* is false, lineLength is ignored. |
||||
* @param lineSeparator |
||||
* If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n). |
||||
* If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored. |
||||
*/ |
||||
public Base64OutputStream(final OutputStream out, final boolean doEncode, |
||||
final int lineLength, final byte[] lineSeparator) { |
||||
super(out, new Base64(lineLength, lineSeparator), doEncode); |
||||
} |
||||
} |
@ -0,0 +1,547 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
import java.util.Arrays; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.BinaryDecoder; |
||||
import com.fr.third.org.apache.commons.codec.BinaryEncoder; |
||||
import com.fr.third.org.apache.commons.codec.DecoderException; |
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
|
||||
/** |
||||
* Abstract superclass for Base-N encoders and decoders. |
||||
* |
||||
* <p> |
||||
* This class is thread-safe. |
||||
* </p> |
||||
* |
||||
* @version $Id: BaseNCodec.java 1811344 2017-10-06 15:19:57Z ggregory $ |
||||
*/ |
||||
public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { |
||||
|
||||
/** |
||||
* Holds thread context so classes can be thread-safe. |
||||
* |
||||
* This class is not itself thread-safe; each thread must allocate its own copy. |
||||
* |
||||
* @since 1.7 |
||||
*/ |
||||
static class Context { |
||||
|
||||
/** |
||||
* Place holder for the bytes we're dealing with for our based logic. |
||||
* Bitwise operations store and extract the encoding or decoding from this variable. |
||||
*/ |
||||
int ibitWorkArea; |
||||
|
||||
/** |
||||
* Place holder for the bytes we're dealing with for our based logic. |
||||
* Bitwise operations store and extract the encoding or decoding from this variable. |
||||
*/ |
||||
long lbitWorkArea; |
||||
|
||||
/** |
||||
* Buffer for streaming. |
||||
*/ |
||||
byte[] buffer; |
||||
|
||||
/** |
||||
* Position where next character should be written in the buffer. |
||||
*/ |
||||
int pos; |
||||
|
||||
/** |
||||
* Position where next character should be read from the buffer. |
||||
*/ |
||||
int readPos; |
||||
|
||||
/** |
||||
* Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, |
||||
* and must be thrown away. |
||||
*/ |
||||
boolean eof; |
||||
|
||||
/** |
||||
* Variable tracks how many characters have been written to the current line. Only used when encoding. We use |
||||
* it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). |
||||
*/ |
||||
int currentLinePos; |
||||
|
||||
/** |
||||
* Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This |
||||
* variable helps track that. |
||||
*/ |
||||
int modulus; |
||||
|
||||
Context() { |
||||
} |
||||
|
||||
/** |
||||
* Returns a String useful for debugging (especially within a debugger.) |
||||
* |
||||
* @return a String useful for debugging. |
||||
*/ |
||||
@SuppressWarnings("boxing") // OK to ignore boxing here
|
||||
@Override |
||||
public String toString() { |
||||
return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + |
||||
"modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer), |
||||
currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* EOF |
||||
* |
||||
* @since 1.7 |
||||
*/ |
||||
static final int EOF = -1; |
||||
|
||||
/** |
||||
* MIME chunk size per RFC 2045 section 6.8. |
||||
* |
||||
* <p> |
||||
* The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any |
||||
* equal signs. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> |
||||
*/ |
||||
public static final int MIME_CHUNK_SIZE = 76; |
||||
|
||||
/** |
||||
* PEM chunk size per RFC 1421 section 4.3.2.4. |
||||
* |
||||
* <p> |
||||
* The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any |
||||
* equal signs. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> |
||||
*/ |
||||
public static final int PEM_CHUNK_SIZE = 64; |
||||
|
||||
private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; |
||||
|
||||
/** |
||||
* Defines the default buffer size - currently {@value} |
||||
* - must be large enough for at least one encoded block+separator |
||||
*/ |
||||
private static final int DEFAULT_BUFFER_SIZE = 8192; |
||||
|
||||
/** Mask used to extract 8 bits, used in decoding bytes */ |
||||
protected static final int MASK_8BITS = 0xff; |
||||
|
||||
/** |
||||
* Byte used to pad output. |
||||
*/ |
||||
protected static final byte PAD_DEFAULT = '='; // Allow static access to default
|
||||
|
||||
/** |
||||
* @deprecated Use {@link #pad}. Will be removed in 2.0. |
||||
*/ |
||||
@Deprecated |
||||
protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
|
||||
|
||||
protected final byte pad; // instance variable just in case it needs to vary later
|
||||
|
||||
/** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ |
||||
private final int unencodedBlockSize; |
||||
|
||||
/** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ |
||||
private final int encodedBlockSize; |
||||
|
||||
/** |
||||
* Chunksize for encoding. Not used when decoding. |
||||
* A value of zero or less implies no chunking of the encoded data. |
||||
* Rounded down to nearest multiple of encodedBlockSize. |
||||
*/ |
||||
protected final int lineLength; |
||||
|
||||
/** |
||||
* Size of chunk separator. Not used unless {@link #lineLength} > 0. |
||||
*/ |
||||
private final int chunkSeparatorLength; |
||||
|
||||
/** |
||||
* Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} |
||||
* If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. |
||||
* @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) |
||||
* @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) |
||||
* @param lineLength if > 0, use chunking with a length <code>lineLength</code> |
||||
* @param chunkSeparatorLength the chunk separator length, if relevant |
||||
*/ |
||||
protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, |
||||
final int lineLength, final int chunkSeparatorLength) { |
||||
this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT); |
||||
} |
||||
|
||||
/** |
||||
* Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} |
||||
* If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. |
||||
* @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) |
||||
* @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) |
||||
* @param lineLength if > 0, use chunking with a length <code>lineLength</code> |
||||
* @param chunkSeparatorLength the chunk separator length, if relevant |
||||
* @param pad byte used as padding byte. |
||||
*/ |
||||
protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, |
||||
final int lineLength, final int chunkSeparatorLength, final byte pad) { |
||||
this.unencodedBlockSize = unencodedBlockSize; |
||||
this.encodedBlockSize = encodedBlockSize; |
||||
final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; |
||||
this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; |
||||
this.chunkSeparatorLength = chunkSeparatorLength; |
||||
|
||||
this.pad = pad; |
||||
} |
||||
|
||||
/** |
||||
* Returns true if this object has buffered data for reading. |
||||
* |
||||
* @param context the context to be used |
||||
* @return true if there is data still available for reading. |
||||
*/ |
||||
boolean hasData(final Context context) { // package protected for access from I/O streams
|
||||
return context.buffer != null; |
||||
} |
||||
|
||||
/** |
||||
* Returns the amount of buffered data available for reading. |
||||
* |
||||
* @param context the context to be used |
||||
* @return The amount of buffered data available for reading. |
||||
*/ |
||||
int available(final Context context) { // package protected for access from I/O streams
|
||||
return context.buffer != null ? context.pos - context.readPos : 0; |
||||
} |
||||
|
||||
/** |
||||
* Get the default buffer size. Can be overridden. |
||||
* |
||||
* @return {@link #DEFAULT_BUFFER_SIZE} |
||||
*/ |
||||
protected int getDefaultBufferSize() { |
||||
return DEFAULT_BUFFER_SIZE; |
||||
} |
||||
|
||||
/** |
||||
* Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. |
||||
* @param context the context to be used |
||||
*/ |
||||
private byte[] resizeBuffer(final Context context) { |
||||
if (context.buffer == null) { |
||||
context.buffer = new byte[getDefaultBufferSize()]; |
||||
context.pos = 0; |
||||
context.readPos = 0; |
||||
} else { |
||||
final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; |
||||
System.arraycopy(context.buffer, 0, b, 0, context.buffer.length); |
||||
context.buffer = b; |
||||
} |
||||
return context.buffer; |
||||
} |
||||
|
||||
/** |
||||
* Ensure that the buffer has room for <code>size</code> bytes |
||||
* |
||||
* @param size minimum spare space required |
||||
* @param context the context to be used |
||||
* @return the buffer |
||||
*/ |
||||
protected byte[] ensureBufferSize(final int size, final Context context){ |
||||
if ((context.buffer == null) || (context.buffer.length < context.pos + size)){ |
||||
return resizeBuffer(context); |
||||
} |
||||
return context.buffer; |
||||
} |
||||
|
||||
/** |
||||
* Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail |
||||
* bytes. Returns how many bytes were actually extracted. |
||||
* <p> |
||||
* Package protected for access from I/O streams. |
||||
* |
||||
* @param b |
||||
* byte[] array to extract the buffered data into. |
||||
* @param bPos |
||||
* position in byte[] array to start extraction at. |
||||
* @param bAvail |
||||
* amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). |
||||
* @param context |
||||
* the context to be used |
||||
* @return The number of bytes successfully extracted into the provided byte[] array. |
||||
*/ |
||||
int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { |
||||
if (context.buffer != null) { |
||||
final int len = Math.min(available(context), bAvail); |
||||
System.arraycopy(context.buffer, context.readPos, b, bPos, len); |
||||
context.readPos += len; |
||||
if (context.readPos >= context.pos) { |
||||
context.buffer = null; // so hasData() will return false, and this method can return -1
|
||||
} |
||||
return len; |
||||
} |
||||
return context.eof ? EOF : 0; |
||||
} |
||||
|
||||
/** |
||||
* Checks if a byte value is whitespace or not. |
||||
* Whitespace is taken to mean: space, tab, CR, LF |
||||
* @param byteToCheck |
||||
* the byte to check |
||||
* @return true if byte is whitespace, false otherwise |
||||
*/ |
||||
protected static boolean isWhiteSpace(final byte byteToCheck) { |
||||
switch (byteToCheck) { |
||||
case ' ' : |
||||
case '\n' : |
||||
case '\r' : |
||||
case '\t' : |
||||
return true; |
||||
default : |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of |
||||
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. |
||||
* |
||||
* @param obj |
||||
* Object to encode |
||||
* @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. |
||||
* @throws EncoderException |
||||
* if the parameter supplied is not of type byte[] |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object obj) throws EncoderException { |
||||
if (!(obj instanceof byte[])) { |
||||
throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); |
||||
} |
||||
return encode((byte[]) obj); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. |
||||
* Uses UTF8 encoding. |
||||
* |
||||
* @param pArray |
||||
* a byte array containing binary data |
||||
* @return A String containing only Base-N character data |
||||
*/ |
||||
public String encodeToString(final byte[] pArray) { |
||||
return StringUtils.newStringUtf8(encode(pArray)); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. |
||||
* Uses UTF8 encoding. |
||||
* |
||||
* @param pArray a byte array containing binary data |
||||
* @return String containing only character data in the appropriate alphabet. |
||||
* @since 1.5 |
||||
* This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring. |
||||
*/ |
||||
public String encodeAsString(final byte[] pArray){ |
||||
return StringUtils.newStringUtf8(encode(pArray)); |
||||
} |
||||
|
||||
/** |
||||
* Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of |
||||
* the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. |
||||
* |
||||
* @param obj |
||||
* Object to decode |
||||
* @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String |
||||
* supplied. |
||||
* @throws DecoderException |
||||
* if the parameter supplied is not of type byte[] |
||||
*/ |
||||
@Override |
||||
public Object decode(final Object obj) throws DecoderException { |
||||
if (obj instanceof byte[]) { |
||||
return decode((byte[]) obj); |
||||
} else if (obj instanceof String) { |
||||
return decode((String) obj); |
||||
} else { |
||||
throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Decodes a String containing characters in the Base-N alphabet. |
||||
* |
||||
* @param pArray |
||||
* A String containing Base-N character data |
||||
* @return a byte array containing binary data |
||||
*/ |
||||
public byte[] decode(final String pArray) { |
||||
return decode(StringUtils.getBytesUtf8(pArray)); |
||||
} |
||||
|
||||
/** |
||||
* Decodes a byte[] containing characters in the Base-N alphabet. |
||||
* |
||||
* @param pArray |
||||
* A byte array containing Base-N character data |
||||
* @return a byte array containing binary data |
||||
*/ |
||||
@Override |
||||
public byte[] decode(final byte[] pArray) { |
||||
if (pArray == null || pArray.length == 0) { |
||||
return pArray; |
||||
} |
||||
final Context context = new Context(); |
||||
decode(pArray, 0, pArray.length, context); |
||||
decode(pArray, 0, EOF, context); // Notify decoder of EOF.
|
||||
final byte[] result = new byte[context.pos]; |
||||
readResults(result, 0, result.length, context); |
||||
return result; |
||||
} |
||||
|
||||
/** |
||||
* Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. |
||||
* |
||||
* @param pArray |
||||
* a byte array containing binary data |
||||
* @return A byte array containing only the base N alphabetic character data |
||||
*/ |
||||
@Override |
||||
public byte[] encode(final byte[] pArray) { |
||||
if (pArray == null || pArray.length == 0) { |
||||
return pArray; |
||||
} |
||||
return encode(pArray, 0, pArray.length); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a byte[] containing binary data, into a byte[] containing |
||||
* characters in the alphabet. |
||||
* |
||||
* @param pArray |
||||
* a byte array containing binary data |
||||
* @param offset |
||||
* initial offset of the subarray. |
||||
* @param length |
||||
* length of the subarray. |
||||
* @return A byte array containing only the base N alphabetic character data |
||||
* @since 1.11 |
||||
*/ |
||||
public byte[] encode(final byte[] pArray, final int offset, final int length) { |
||||
if (pArray == null || pArray.length == 0) { |
||||
return pArray; |
||||
} |
||||
final Context context = new Context(); |
||||
encode(pArray, offset, length, context); |
||||
encode(pArray, offset, EOF, context); // Notify encoder of EOF.
|
||||
final byte[] buf = new byte[context.pos - context.readPos]; |
||||
readResults(buf, 0, buf.length, context); |
||||
return buf; |
||||
} |
||||
|
||||
// package protected for access from I/O streams
|
||||
abstract void encode(byte[] pArray, int i, int length, Context context); |
||||
|
||||
// package protected for access from I/O streams
|
||||
abstract void decode(byte[] pArray, int i, int length, Context context); |
||||
|
||||
/** |
||||
* Returns whether or not the <code>octet</code> is in the current alphabet. |
||||
* Does not allow whitespace or pad. |
||||
* |
||||
* @param value The value to test |
||||
* |
||||
* @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise. |
||||
*/ |
||||
protected abstract boolean isInAlphabet(byte value); |
||||
|
||||
/** |
||||
* Tests a given byte array to see if it contains only valid characters within the alphabet. |
||||
* The method optionally treats whitespace and pad as valid. |
||||
* |
||||
* @param arrayOctet byte array to test |
||||
* @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed |
||||
* |
||||
* @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty; |
||||
* <code>false</code>, otherwise |
||||
*/ |
||||
public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) { |
||||
for (final byte octet : arrayOctet) { |
||||
if (!isInAlphabet(octet) && |
||||
(!allowWSPad || (octet != pad) && !isWhiteSpace(octet))) { |
||||
return false; |
||||
} |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
/** |
||||
* Tests a given String to see if it contains only valid characters within the alphabet. |
||||
* The method treats whitespace and PAD as valid. |
||||
* |
||||
* @param basen String to test |
||||
* @return <code>true</code> if all characters in the String are valid characters in the alphabet or if |
||||
* the String is empty; <code>false</code>, otherwise |
||||
* @see #isInAlphabet(byte[], boolean) |
||||
*/ |
||||
public boolean isInAlphabet(final String basen) { |
||||
return isInAlphabet(StringUtils.getBytesUtf8(basen), true); |
||||
} |
||||
|
||||
/** |
||||
* Tests a given byte array to see if it contains any characters within the alphabet or PAD. |
||||
* |
||||
* Intended for use in checking line-ending arrays |
||||
* |
||||
* @param arrayOctet |
||||
* byte array to test |
||||
* @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise |
||||
*/ |
||||
protected boolean containsAlphabetOrPad(final byte[] arrayOctet) { |
||||
if (arrayOctet == null) { |
||||
return false; |
||||
} |
||||
for (final byte element : arrayOctet) { |
||||
if (pad == element || isInAlphabet(element)) { |
||||
return true; |
||||
} |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
/** |
||||
* Calculates the amount of space needed to encode the supplied array. |
||||
* |
||||
* @param pArray byte[] array which will later be encoded |
||||
* |
||||
* @return amount of space needed to encoded the supplied array. |
||||
* Returns a long since a max-len array will require > Integer.MAX_VALUE |
||||
*/ |
||||
public long getEncodedLength(final byte[] pArray) { |
||||
// Calculate non-chunked size - rounded up to allow for padding
|
||||
// cast to long is needed to avoid possibility of overflow
|
||||
long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize; |
||||
if (lineLength > 0) { // We're using chunking
|
||||
// Round up to nearest multiple
|
||||
len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength; |
||||
} |
||||
return len; |
||||
} |
||||
} |
@ -0,0 +1,211 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
import static com.fr.third.org.apache.commons.codec.binary.BaseNCodec.EOF; |
||||
|
||||
import java.io.FilterInputStream; |
||||
import java.io.IOException; |
||||
import java.io.InputStream; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.binary.BaseNCodec.Context; |
||||
|
||||
/** |
||||
* Abstract superclass for Base-N input streams. |
||||
* |
||||
* @since 1.5 |
||||
* @version $Id: BaseNCodecInputStream.java 1429868 2013-01-07 16:08:05Z ggregory $ |
||||
*/ |
||||
public class BaseNCodecInputStream extends FilterInputStream { |
||||
|
||||
private final BaseNCodec baseNCodec; |
||||
|
||||
private final boolean doEncode; |
||||
|
||||
private final byte[] singleByte = new byte[1]; |
||||
|
||||
private final Context context = new Context(); |
||||
|
||||
protected BaseNCodecInputStream(final InputStream in, final BaseNCodec baseNCodec, final boolean doEncode) { |
||||
super(in); |
||||
this.doEncode = doEncode; |
||||
this.baseNCodec = baseNCodec; |
||||
} |
||||
|
||||
/** |
||||
* {@inheritDoc} |
||||
* |
||||
* @return <code>0</code> if the {@link InputStream} has reached <code>EOF</code>, |
||||
* <code>1</code> otherwise |
||||
* @since 1.7 |
||||
*/ |
||||
@Override |
||||
public int available() throws IOException { |
||||
// Note: the logic is similar to the InflaterInputStream:
|
||||
// as long as we have not reached EOF, indicate that there is more
|
||||
// data available. As we do not know for sure how much data is left,
|
||||
// just return 1 as a safe guess.
|
||||
|
||||
return context.eof ? 0 : 1; |
||||
} |
||||
|
||||
/** |
||||
* Marks the current position in this input stream. |
||||
* <p>The {@link #mark} method of {@link BaseNCodecInputStream} does nothing.</p> |
||||
* |
||||
* @param readLimit the maximum limit of bytes that can be read before the mark position becomes invalid. |
||||
* @since 1.7 |
||||
*/ |
||||
@Override |
||||
public synchronized void mark(final int readLimit) { |
||||
} |
||||
|
||||
/** |
||||
* {@inheritDoc} |
||||
* |
||||
* @return always returns <code>false</code> |
||||
*/ |
||||
@Override |
||||
public boolean markSupported() { |
||||
return false; // not an easy job to support marks
|
||||
} |
||||
|
||||
/** |
||||
* Reads one <code>byte</code> from this input stream. |
||||
* |
||||
* @return the byte as an integer in the range 0 to 255. Returns -1 if EOF has been reached. |
||||
* @throws IOException |
||||
* if an I/O error occurs. |
||||
*/ |
||||
@Override |
||||
public int read() throws IOException { |
||||
int r = read(singleByte, 0, 1); |
||||
while (r == 0) { |
||||
r = read(singleByte, 0, 1); |
||||
} |
||||
if (r > 0) { |
||||
final byte b = singleByte[0]; |
||||
return b < 0 ? 256 + b : b; |
||||
} |
||||
return EOF; |
||||
} |
||||
|
||||
/** |
||||
* Attempts to read <code>len</code> bytes into the specified <code>b</code> array starting at <code>offset</code> |
||||
* from this InputStream. |
||||
* |
||||
* @param b |
||||
* destination byte array |
||||
* @param offset |
||||
* where to start writing the bytes |
||||
* @param len |
||||
* maximum number of bytes to read |
||||
* |
||||
* @return number of bytes read |
||||
* @throws IOException |
||||
* if an I/O error occurs. |
||||
* @throws NullPointerException |
||||
* if the byte array parameter is null |
||||
* @throws IndexOutOfBoundsException |
||||
* if offset, len or buffer size are invalid |
||||
*/ |
||||
@Override |
||||
public int read(final byte b[], final int offset, final int len) throws IOException { |
||||
if (b == null) { |
||||
throw new NullPointerException(); |
||||
} else if (offset < 0 || len < 0) { |
||||
throw new IndexOutOfBoundsException(); |
||||
} else if (offset > b.length || offset + len > b.length) { |
||||
throw new IndexOutOfBoundsException(); |
||||
} else if (len == 0) { |
||||
return 0; |
||||
} else { |
||||
int readLen = 0; |
||||
/* |
||||
Rationale for while-loop on (readLen == 0): |
||||
----- |
||||
Base32.readResults() usually returns > 0 or EOF (-1). In the |
||||
rare case where it returns 0, we just keep trying. |
||||
|
||||
This is essentially an undocumented contract for InputStream |
||||
implementors that want their code to work properly with |
||||
java.io.InputStreamReader, since the latter hates it when |
||||
InputStream.read(byte[]) returns a zero. Unfortunately our |
||||
readResults() call must return 0 if a large amount of the data |
||||
being decoded was non-base32, so this while-loop enables proper |
||||
interop with InputStreamReader for that scenario. |
||||
----- |
||||
This is a fix for CODEC-101 |
||||
*/ |
||||
while (readLen == 0) { |
||||
if (!baseNCodec.hasData(context)) { |
||||
final byte[] buf = new byte[doEncode ? 4096 : 8192]; |
||||
final int c = in.read(buf); |
||||
if (doEncode) { |
||||
baseNCodec.encode(buf, 0, c, context); |
||||
} else { |
||||
baseNCodec.decode(buf, 0, c, context); |
||||
} |
||||
} |
||||
readLen = baseNCodec.readResults(b, offset, len, context); |
||||
} |
||||
return readLen; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Repositions this stream to the position at the time the mark method was last called on this input stream. |
||||
* <p> |
||||
* The {@link #reset} method of {@link BaseNCodecInputStream} does nothing except throw an {@link IOException}. |
||||
* |
||||
* @throws IOException if this method is invoked |
||||
* @since 1.7 |
||||
*/ |
||||
@Override |
||||
public synchronized void reset() throws IOException { |
||||
throw new IOException("mark/reset not supported"); |
||||
} |
||||
|
||||
/** |
||||
* {@inheritDoc} |
||||
* |
||||
* @throws IllegalArgumentException if the provided skip length is negative |
||||
* @since 1.7 |
||||
*/ |
||||
@Override |
||||
public long skip(final long n) throws IOException { |
||||
if (n < 0) { |
||||
throw new IllegalArgumentException("Negative skip length: " + n); |
||||
} |
||||
|
||||
// skip in chunks of 512 bytes
|
||||
final byte[] b = new byte[512]; |
||||
long todo = n; |
||||
|
||||
while (todo > 0) { |
||||
int len = (int) Math.min(b.length, todo); |
||||
len = this.read(b, 0, len); |
||||
if (len == EOF) { |
||||
break; |
||||
} |
||||
todo -= len; |
||||
} |
||||
|
||||
return n - todo; |
||||
} |
||||
} |
@ -0,0 +1,176 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
import static com.fr.third.org.apache.commons.codec.binary.BaseNCodec.EOF; |
||||
|
||||
import java.io.FilterOutputStream; |
||||
import java.io.IOException; |
||||
import java.io.OutputStream; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.binary.BaseNCodec.Context; |
||||
|
||||
/** |
||||
* Abstract superclass for Base-N output streams. |
||||
* <p> |
||||
* To write the EOF marker without closing the stream, call {@link #eof()} or use an <a |
||||
* href="https://commons.apache.org/proper/commons-io/">Apache Commons IO</a> <a href= |
||||
* "https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/output/CloseShieldOutputStream.html" |
||||
* >CloseShieldOutputStream</a>. |
||||
* </p> |
||||
* |
||||
* @since 1.5 |
||||
* @version $Id: BaseNCodecOutputStream.java 1744727 2016-05-20 12:43:52Z sebb $ |
||||
*/ |
||||
public class BaseNCodecOutputStream extends FilterOutputStream { |
||||
|
||||
private final boolean doEncode; |
||||
|
||||
private final BaseNCodec baseNCodec; |
||||
|
||||
private final byte[] singleByte = new byte[1]; |
||||
|
||||
private final Context context = new Context(); |
||||
|
||||
// TODO should this be protected?
|
||||
public BaseNCodecOutputStream(final OutputStream out, final BaseNCodec basedCodec, final boolean doEncode) { |
||||
super(out); |
||||
this.baseNCodec = basedCodec; |
||||
this.doEncode = doEncode; |
||||
} |
||||
|
||||
/** |
||||
* Writes the specified <code>byte</code> to this output stream. |
||||
* |
||||
* @param i |
||||
* source byte |
||||
* @throws IOException |
||||
* if an I/O error occurs. |
||||
*/ |
||||
@Override |
||||
public void write(final int i) throws IOException { |
||||
singleByte[0] = (byte) i; |
||||
write(singleByte, 0, 1); |
||||
} |
||||
|
||||
/** |
||||
* Writes <code>len</code> bytes from the specified <code>b</code> array starting at <code>offset</code> to this |
||||
* output stream. |
||||
* |
||||
* @param b |
||||
* source byte array |
||||
* @param offset |
||||
* where to start reading the bytes |
||||
* @param len |
||||
* maximum number of bytes to write |
||||
* |
||||
* @throws IOException |
||||
* if an I/O error occurs. |
||||
* @throws NullPointerException |
||||
* if the byte array parameter is null |
||||
* @throws IndexOutOfBoundsException |
||||
* if offset, len or buffer size are invalid |
||||
*/ |
||||
@Override |
||||
public void write(final byte b[], final int offset, final int len) throws IOException { |
||||
if (b == null) { |
||||
throw new NullPointerException(); |
||||
} else if (offset < 0 || len < 0) { |
||||
throw new IndexOutOfBoundsException(); |
||||
} else if (offset > b.length || offset + len > b.length) { |
||||
throw new IndexOutOfBoundsException(); |
||||
} else if (len > 0) { |
||||
if (doEncode) { |
||||
baseNCodec.encode(b, offset, len, context); |
||||
} else { |
||||
baseNCodec.decode(b, offset, len, context); |
||||
} |
||||
flush(false); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Flushes this output stream and forces any buffered output bytes to be written out to the stream. If propagate is |
||||
* true, the wrapped stream will also be flushed. |
||||
* |
||||
* @param propagate |
||||
* boolean flag to indicate whether the wrapped OutputStream should also be flushed. |
||||
* @throws IOException |
||||
* if an I/O error occurs. |
||||
*/ |
||||
private void flush(final boolean propagate) throws IOException { |
||||
final int avail = baseNCodec.available(context); |
||||
if (avail > 0) { |
||||
final byte[] buf = new byte[avail]; |
||||
final int c = baseNCodec.readResults(buf, 0, avail, context); |
||||
if (c > 0) { |
||||
out.write(buf, 0, c); |
||||
} |
||||
} |
||||
if (propagate) { |
||||
out.flush(); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Flushes this output stream and forces any buffered output bytes to be written out to the stream. |
||||
* |
||||
* @throws IOException |
||||
* if an I/O error occurs. |
||||
*/ |
||||
@Override |
||||
public void flush() throws IOException { |
||||
flush(true); |
||||
} |
||||
|
||||
/** |
||||
* Closes this output stream and releases any system resources associated with the stream. |
||||
* <p> |
||||
* To write the EOF marker without closing the stream, call {@link #eof()} or use an |
||||
* <a href="https://commons.apache.org/proper/commons-io/">Apache Commons IO</a> <a href= |
||||
* "https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/output/CloseShieldOutputStream.html" |
||||
* >CloseShieldOutputStream</a>. |
||||
* </p> |
||||
* |
||||
* @throws IOException |
||||
* if an I/O error occurs. |
||||
*/ |
||||
@Override |
||||
public void close() throws IOException { |
||||
eof(); |
||||
flush(); |
||||
out.close(); |
||||
} |
||||
|
||||
/** |
||||
* Writes EOF. |
||||
* |
||||
* @throws IOException |
||||
* if an I/O error occurs. |
||||
* @since 1.11 |
||||
*/ |
||||
public void eof() throws IOException { |
||||
// Notify encoder of EOF (-1).
|
||||
if (doEncode) { |
||||
baseNCodec.encode(singleByte, 0, EOF, context); |
||||
} else { |
||||
baseNCodec.decode(singleByte, 0, EOF, context); |
||||
} |
||||
} |
||||
|
||||
} |
@ -0,0 +1,303 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.Decoder; |
||||
import com.fr.third.org.apache.commons.codec.BinaryDecoder; |
||||
import com.fr.third.org.apache.commons.codec.Encoder; |
||||
import com.fr.third.org.apache.commons.codec.BinaryEncoder; |
||||
import com.fr.third.org.apache.commons.codec.DecoderException; |
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
|
||||
/** |
||||
* Converts between byte arrays and strings of "0"s and "1"s. |
||||
* |
||||
* <p>This class is immutable and thread-safe.</p> |
||||
* |
||||
* TODO: may want to add more bit vector functions like and/or/xor/nand |
||||
* TODO: also might be good to generate boolean[] from byte[] et cetera. |
||||
* |
||||
* @since 1.3 |
||||
* @version $Id: BinaryCodec.java 1619948 2014-08-22 22:53:55Z ggregory $ |
||||
*/ |
||||
public class BinaryCodec implements BinaryDecoder, BinaryEncoder { |
||||
/* |
||||
* tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth |
||||
* it. |
||||
*/ |
||||
/** Empty char array. */ |
||||
private static final char[] EMPTY_CHAR_ARRAY = new char[0]; |
||||
|
||||
/** Empty byte array. */ |
||||
private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; |
||||
|
||||
/** Mask for bit 0 of a byte. */ |
||||
private static final int BIT_0 = 1; |
||||
|
||||
/** Mask for bit 1 of a byte. */ |
||||
private static final int BIT_1 = 0x02; |
||||
|
||||
/** Mask for bit 2 of a byte. */ |
||||
private static final int BIT_2 = 0x04; |
||||
|
||||
/** Mask for bit 3 of a byte. */ |
||||
private static final int BIT_3 = 0x08; |
||||
|
||||
/** Mask for bit 4 of a byte. */ |
||||
private static final int BIT_4 = 0x10; |
||||
|
||||
/** Mask for bit 5 of a byte. */ |
||||
private static final int BIT_5 = 0x20; |
||||
|
||||
/** Mask for bit 6 of a byte. */ |
||||
private static final int BIT_6 = 0x40; |
||||
|
||||
/** Mask for bit 7 of a byte. */ |
||||
private static final int BIT_7 = 0x80; |
||||
|
||||
private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7}; |
||||
|
||||
/** |
||||
* Converts an array of raw binary data into an array of ASCII 0 and 1 characters. |
||||
* |
||||
* @param raw |
||||
* the raw binary data to convert |
||||
* @return 0 and 1 ASCII character bytes one for each bit of the argument |
||||
* @see BinaryEncoder#encode(byte[]) |
||||
*/ |
||||
@Override |
||||
public byte[] encode(final byte[] raw) { |
||||
return toAsciiBytes(raw); |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of raw binary data into an array of ASCII 0 and 1 chars. |
||||
* |
||||
* @param raw |
||||
* the raw binary data to convert |
||||
* @return 0 and 1 ASCII character chars one for each bit of the argument |
||||
* @throws EncoderException |
||||
* if the argument is not a byte[] |
||||
* @see Encoder#encode(Object) |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object raw) throws EncoderException { |
||||
if (!(raw instanceof byte[])) { |
||||
throw new EncoderException("argument not a byte array"); |
||||
} |
||||
return toAsciiChars((byte[]) raw); |
||||
} |
||||
|
||||
/** |
||||
* Decodes a byte array where each byte represents an ASCII '0' or '1'. |
||||
* |
||||
* @param ascii |
||||
* each byte represents an ASCII '0' or '1' |
||||
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument |
||||
* @throws DecoderException |
||||
* if argument is not a byte[], char[] or String |
||||
* @see Decoder#decode(Object) |
||||
*/ |
||||
@Override |
||||
public Object decode(final Object ascii) throws DecoderException { |
||||
if (ascii == null) { |
||||
return EMPTY_BYTE_ARRAY; |
||||
} |
||||
if (ascii instanceof byte[]) { |
||||
return fromAscii((byte[]) ascii); |
||||
} |
||||
if (ascii instanceof char[]) { |
||||
return fromAscii((char[]) ascii); |
||||
} |
||||
if (ascii instanceof String) { |
||||
return fromAscii(((String) ascii).toCharArray()); |
||||
} |
||||
throw new DecoderException("argument not a byte array"); |
||||
} |
||||
|
||||
/** |
||||
* Decodes a byte array where each byte represents an ASCII '0' or '1'. |
||||
* |
||||
* @param ascii |
||||
* each byte represents an ASCII '0' or '1' |
||||
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument |
||||
* @see Decoder#decode(Object) |
||||
*/ |
||||
@Override |
||||
public byte[] decode(final byte[] ascii) { |
||||
return fromAscii(ascii); |
||||
} |
||||
|
||||
/** |
||||
* Decodes a String where each char of the String represents an ASCII '0' or '1'. |
||||
* |
||||
* @param ascii |
||||
* String of '0' and '1' characters |
||||
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument |
||||
* @see Decoder#decode(Object) |
||||
*/ |
||||
public byte[] toByteArray(final String ascii) { |
||||
if (ascii == null) { |
||||
return EMPTY_BYTE_ARRAY; |
||||
} |
||||
return fromAscii(ascii.toCharArray()); |
||||
} |
||||
|
||||
// ------------------------------------------------------------------------
|
||||
//
|
||||
// static codec operations
|
||||
//
|
||||
// ------------------------------------------------------------------------
|
||||
/** |
||||
* Decodes a char array where each char represents an ASCII '0' or '1'. |
||||
* |
||||
* @param ascii |
||||
* each char represents an ASCII '0' or '1' |
||||
* @return the raw encoded binary where each bit corresponds to a char in the char array argument |
||||
*/ |
||||
public static byte[] fromAscii(final char[] ascii) { |
||||
if (ascii == null || ascii.length == 0) { |
||||
return EMPTY_BYTE_ARRAY; |
||||
} |
||||
// get length/8 times bytes with 3 bit shifts to the right of the length
|
||||
final byte[] l_raw = new byte[ascii.length >> 3]; |
||||
/* |
||||
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the |
||||
* loop. |
||||
*/ |
||||
for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) { |
||||
for (int bits = 0; bits < BITS.length; ++bits) { |
||||
if (ascii[jj - bits] == '1') { |
||||
l_raw[ii] |= BITS[bits]; |
||||
} |
||||
} |
||||
} |
||||
return l_raw; |
||||
} |
||||
|
||||
/** |
||||
* Decodes a byte array where each byte represents an ASCII '0' or '1'. |
||||
* |
||||
* @param ascii |
||||
* each byte represents an ASCII '0' or '1' |
||||
* @return the raw encoded binary where each bit corresponds to a byte in the byte array argument |
||||
*/ |
||||
public static byte[] fromAscii(final byte[] ascii) { |
||||
if (isEmpty(ascii)) { |
||||
return EMPTY_BYTE_ARRAY; |
||||
} |
||||
// get length/8 times bytes with 3 bit shifts to the right of the length
|
||||
final byte[] l_raw = new byte[ascii.length >> 3]; |
||||
/* |
||||
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the |
||||
* loop. |
||||
*/ |
||||
for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) { |
||||
for (int bits = 0; bits < BITS.length; ++bits) { |
||||
if (ascii[jj - bits] == '1') { |
||||
l_raw[ii] |= BITS[bits]; |
||||
} |
||||
} |
||||
} |
||||
return l_raw; |
||||
} |
||||
|
||||
/** |
||||
* Returns <code>true</code> if the given array is <code>null</code> or empty (size 0.) |
||||
* |
||||
* @param array |
||||
* the source array |
||||
* @return <code>true</code> if the given array is <code>null</code> or empty (size 0.) |
||||
*/ |
||||
private static boolean isEmpty(final byte[] array) { |
||||
return array == null || array.length == 0; |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated |
||||
* char. |
||||
* |
||||
* @param raw |
||||
* the raw binary data to convert |
||||
* @return an array of 0 and 1 character bytes for each bit of the argument |
||||
* @see BinaryEncoder#encode(byte[]) |
||||
*/ |
||||
public static byte[] toAsciiBytes(final byte[] raw) { |
||||
if (isEmpty(raw)) { |
||||
return EMPTY_BYTE_ARRAY; |
||||
} |
||||
// get 8 times the bytes with 3 bit shifts to the left of the length
|
||||
final byte[] l_ascii = new byte[raw.length << 3]; |
||||
/* |
||||
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the |
||||
* loop. |
||||
*/ |
||||
for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) { |
||||
for (int bits = 0; bits < BITS.length; ++bits) { |
||||
if ((raw[ii] & BITS[bits]) == 0) { |
||||
l_ascii[jj - bits] = '0'; |
||||
} else { |
||||
l_ascii[jj - bits] = '1'; |
||||
} |
||||
} |
||||
} |
||||
return l_ascii; |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of raw binary data into an array of ASCII 0 and 1 characters. |
||||
* |
||||
* @param raw |
||||
* the raw binary data to convert |
||||
* @return an array of 0 and 1 characters for each bit of the argument |
||||
* @see BinaryEncoder#encode(byte[]) |
||||
*/ |
||||
public static char[] toAsciiChars(final byte[] raw) { |
||||
if (isEmpty(raw)) { |
||||
return EMPTY_CHAR_ARRAY; |
||||
} |
||||
// get 8 times the bytes with 3 bit shifts to the left of the length
|
||||
final char[] l_ascii = new char[raw.length << 3]; |
||||
/* |
||||
* We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the |
||||
* loop. |
||||
*/ |
||||
for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) { |
||||
for (int bits = 0; bits < BITS.length; ++bits) { |
||||
if ((raw[ii] & BITS[bits]) == 0) { |
||||
l_ascii[jj - bits] = '0'; |
||||
} else { |
||||
l_ascii[jj - bits] = '1'; |
||||
} |
||||
} |
||||
} |
||||
return l_ascii; |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of raw binary data into a String of ASCII 0 and 1 characters. |
||||
* |
||||
* @param raw |
||||
* the raw binary data to convert |
||||
* @return a String of 0 and 1 characters representing the binary data |
||||
* @see BinaryEncoder#encode(byte[]) |
||||
*/ |
||||
public static String toAsciiString(final byte[] raw) { |
||||
return new String(toAsciiChars(raw)); |
||||
} |
||||
} |
@ -0,0 +1,79 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
/** |
||||
* <p> |
||||
* Operations on {@link CharSequence} that are <code>null</code> safe. |
||||
* </p> |
||||
* <p> |
||||
* Copied from Apache Commons Lang r1586295 on April 10, 2014 (day of 3.3.2 release). |
||||
* </p> |
||||
* |
||||
* @see CharSequence |
||||
* @since 1.10 |
||||
*/ |
||||
public class CharSequenceUtils { |
||||
|
||||
/** |
||||
* Green implementation of regionMatches. |
||||
* |
||||
* @param cs |
||||
* the <code>CharSequence</code> to be processed |
||||
* @param ignoreCase |
||||
* whether or not to be case insensitive |
||||
* @param thisStart |
||||
* the index to start on the <code>cs</code> CharSequence |
||||
* @param substring |
||||
* the <code>CharSequence</code> to be looked for |
||||
* @param start |
||||
* the index to start on the <code>substring</code> CharSequence |
||||
* @param length |
||||
* character length of the region |
||||
* @return whether the region matched |
||||
*/ |
||||
static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, |
||||
final CharSequence substring, final int start, final int length) { |
||||
if (cs instanceof String && substring instanceof String) { |
||||
return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length); |
||||
} |
||||
int index1 = thisStart; |
||||
int index2 = start; |
||||
int tmpLen = length; |
||||
|
||||
while (tmpLen-- > 0) { |
||||
final char c1 = cs.charAt(index1++); |
||||
final char c2 = substring.charAt(index2++); |
||||
|
||||
if (c1 == c2) { |
||||
continue; |
||||
} |
||||
|
||||
if (!ignoreCase) { |
||||
return false; |
||||
} |
||||
|
||||
// The same check as in String.regionMatches():
|
||||
if (Character.toUpperCase(c1) != Character.toUpperCase(c2) && |
||||
Character.toLowerCase(c1) != Character.toLowerCase(c2)) { |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
return true; |
||||
} |
||||
} |
@ -0,0 +1,491 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
import java.nio.ByteBuffer; |
||||
import java.nio.charset.Charset; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.BinaryDecoder; |
||||
import com.fr.third.org.apache.commons.codec.BinaryEncoder; |
||||
import com.fr.third.org.apache.commons.codec.CharEncoding; |
||||
import com.fr.third.org.apache.commons.codec.Charsets; |
||||
import com.fr.third.org.apache.commons.codec.DecoderException; |
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
|
||||
/** |
||||
* Converts hexadecimal Strings. The charset used for certain operation can be set, the default is set in |
||||
* {@link #DEFAULT_CHARSET_NAME} |
||||
* |
||||
* This class is thread-safe. |
||||
* |
||||
* @since 1.1 |
||||
* @version $Id: Hex.java 1811344 2017-10-06 15:19:57Z ggregory $ |
||||
*/ |
||||
public class Hex implements BinaryEncoder, BinaryDecoder { |
||||
|
||||
/** |
||||
* Default charset is {@link Charsets#UTF_8} |
||||
* |
||||
* @since 1.7 |
||||
*/ |
||||
public static final Charset DEFAULT_CHARSET = Charsets.UTF_8; |
||||
|
||||
/** |
||||
* Default charset name is {@link CharEncoding#UTF_8} |
||||
* |
||||
* @since 1.4 |
||||
*/ |
||||
public static final String DEFAULT_CHARSET_NAME = CharEncoding.UTF_8; |
||||
|
||||
/** |
||||
* Used to build output as Hex |
||||
*/ |
||||
private static final char[] DIGITS_LOWER = |
||||
{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; |
||||
|
||||
/** |
||||
* Used to build output as Hex |
||||
*/ |
||||
private static final char[] DIGITS_UPPER = |
||||
{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; |
||||
|
||||
/** |
||||
* Converts a String representing hexadecimal values into an array of bytes of those same values. The |
||||
* returned array will be half the length of the passed String, as it takes two characters to represent any given |
||||
* byte. An exception is thrown if the passed String has an odd number of elements. |
||||
* |
||||
* @param data |
||||
* A String containing hexadecimal digits |
||||
* @return A byte array containing binary data decoded from the supplied char array. |
||||
* @throws DecoderException |
||||
* Thrown if an odd number or illegal of characters is supplied |
||||
* @since 1.11 |
||||
*/ |
||||
public static byte[] decodeHex(final String data) throws DecoderException { |
||||
return decodeHex(data.toCharArray()); |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of characters representing hexadecimal values into an array of bytes of those same values. The |
||||
* returned array will be half the length of the passed array, as it takes two characters to represent any given |
||||
* byte. An exception is thrown if the passed char array has an odd number of elements. |
||||
* |
||||
* @param data |
||||
* An array of characters containing hexadecimal digits |
||||
* @return A byte array containing binary data decoded from the supplied char array. |
||||
* @throws DecoderException |
||||
* Thrown if an odd number or illegal of characters is supplied |
||||
*/ |
||||
public static byte[] decodeHex(final char[] data) throws DecoderException { |
||||
|
||||
final int len = data.length; |
||||
|
||||
if ((len & 0x01) != 0) { |
||||
throw new DecoderException("Odd number of characters."); |
||||
} |
||||
|
||||
final byte[] out = new byte[len >> 1]; |
||||
|
||||
// two characters form the hex value.
|
||||
for (int i = 0, j = 0; j < len; i++) { |
||||
int f = toDigit(data[j], j) << 4; |
||||
j++; |
||||
f = f | toDigit(data[j], j); |
||||
j++; |
||||
out[i] = (byte) (f & 0xFF); |
||||
} |
||||
|
||||
return out; |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order. |
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any |
||||
* given byte. |
||||
* |
||||
* @param data |
||||
* a byte[] to convert to Hex characters |
||||
* @return A char[] containing lower-case hexadecimal characters |
||||
*/ |
||||
public static char[] encodeHex(final byte[] data) { |
||||
return encodeHex(data, true); |
||||
} |
||||
|
||||
/** |
||||
* Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order. |
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any |
||||
* given byte. |
||||
* |
||||
* @param data |
||||
* a byte buffer to convert to Hex characters |
||||
* @return A char[] containing lower-case hexadecimal characters |
||||
* @since 1.11 |
||||
*/ |
||||
public static char[] encodeHex(final ByteBuffer data) { |
||||
return encodeHex(data, true); |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order. |
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any |
||||
* given byte. |
||||
* |
||||
* @param data |
||||
* a byte[] to convert to Hex characters |
||||
* @param toLowerCase |
||||
* <code>true</code> converts to lowercase, <code>false</code> to uppercase |
||||
* @return A char[] containing hexadecimal characters in the selected case |
||||
* @since 1.4 |
||||
*/ |
||||
public static char[] encodeHex(final byte[] data, final boolean toLowerCase) { |
||||
return encodeHex(data, toLowerCase ? DIGITS_LOWER : DIGITS_UPPER); |
||||
} |
||||
|
||||
/** |
||||
* Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order. |
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any |
||||
* given byte. |
||||
* |
||||
* @param data |
||||
* a byte buffer to convert to Hex characters |
||||
* @param toLowerCase |
||||
* <code>true</code> converts to lowercase, <code>false</code> to uppercase |
||||
* @return A char[] containing hexadecimal characters in the selected case |
||||
* @since 1.11 |
||||
*/ |
||||
public static char[] encodeHex(final ByteBuffer data, final boolean toLowerCase) { |
||||
return encodeHex(data, toLowerCase ? DIGITS_LOWER : DIGITS_UPPER); |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order. |
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any |
||||
* given byte. |
||||
* |
||||
* @param data |
||||
* a byte[] to convert to Hex characters |
||||
* @param toDigits |
||||
* the output alphabet (must contain at least 16 chars) |
||||
* @return A char[] containing the appropriate characters from the alphabet |
||||
* For best results, this should be either upper- or lower-case hex. |
||||
* @since 1.4 |
||||
*/ |
||||
protected static char[] encodeHex(final byte[] data, final char[] toDigits) { |
||||
final int l = data.length; |
||||
final char[] out = new char[l << 1]; |
||||
// two characters form the hex value.
|
||||
for (int i = 0, j = 0; i < l; i++) { |
||||
out[j++] = toDigits[(0xF0 & data[i]) >>> 4]; |
||||
out[j++] = toDigits[0x0F & data[i]]; |
||||
} |
||||
return out; |
||||
} |
||||
|
||||
/** |
||||
* Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order. |
||||
* The returned array will be double the length of the passed array, as it takes two characters to represent any |
||||
* given byte. |
||||
* |
||||
* @param data |
||||
* a byte buffer to convert to Hex characters |
||||
* @param toDigits |
||||
* the output alphabet (must be at least 16 characters) |
||||
* @return A char[] containing the appropriate characters from the alphabet |
||||
* For best results, this should be either upper- or lower-case hex. |
||||
* @since 1.11 |
||||
*/ |
||||
protected static char[] encodeHex(final ByteBuffer data, final char[] toDigits) { |
||||
return encodeHex(data.array(), toDigits); |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of bytes into a String representing the hexadecimal values of each byte in order. The returned |
||||
* String will be double the length of the passed array, as it takes two characters to represent any given byte. |
||||
* |
||||
* @param data |
||||
* a byte[] to convert to Hex characters |
||||
* @return A String containing lower-case hexadecimal characters |
||||
* @since 1.4 |
||||
*/ |
||||
public static String encodeHexString(final byte[] data) { |
||||
return new String(encodeHex(data)); |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of bytes into a String representing the hexadecimal values of each byte in order. The returned |
||||
* String will be double the length of the passed array, as it takes two characters to represent any given byte. |
||||
* |
||||
* @param data |
||||
* a byte[] to convert to Hex characters |
||||
* @param toLowerCase |
||||
* <code>true</code> converts to lowercase, <code>false</code> to uppercase |
||||
* @return A String containing lower-case hexadecimal characters |
||||
* @since 1.11 |
||||
*/ |
||||
public static String encodeHexString(final byte[] data, final boolean toLowerCase) { |
||||
return new String(encodeHex(data, toLowerCase)); |
||||
} |
||||
|
||||
/** |
||||
* Converts a byte buffer into a String representing the hexadecimal values of each byte in order. The returned |
||||
* String will be double the length of the passed array, as it takes two characters to represent any given byte. |
||||
* |
||||
* @param data |
||||
* a byte buffer to convert to Hex characters |
||||
* @return A String containing lower-case hexadecimal characters |
||||
* @since 1.11 |
||||
*/ |
||||
public static String encodeHexString(final ByteBuffer data) { |
||||
return new String(encodeHex(data)); |
||||
} |
||||
|
||||
/** |
||||
* Converts a byte buffer into a String representing the hexadecimal values of each byte in order. The returned |
||||
* String will be double the length of the passed array, as it takes two characters to represent any given byte. |
||||
* |
||||
* @param data |
||||
* a byte buffer to convert to Hex characters |
||||
* @param toLowerCase |
||||
* <code>true</code> converts to lowercase, <code>false</code> to uppercase |
||||
* @return A String containing lower-case hexadecimal characters |
||||
* @since 1.11 |
||||
*/ |
||||
public static String encodeHexString(final ByteBuffer data, final boolean toLowerCase) { |
||||
return new String(encodeHex(data, toLowerCase)); |
||||
} |
||||
|
||||
/** |
||||
* Converts a hexadecimal character to an integer. |
||||
* |
||||
* @param ch |
||||
* A character to convert to an integer digit |
||||
* @param index |
||||
* The index of the character in the source |
||||
* @return An integer |
||||
* @throws DecoderException |
||||
* Thrown if ch is an illegal hex character |
||||
*/ |
||||
protected static int toDigit(final char ch, final int index) throws DecoderException { |
||||
final int digit = Character.digit(ch, 16); |
||||
if (digit == -1) { |
||||
throw new DecoderException("Illegal hexadecimal character " + ch + " at index " + index); |
||||
} |
||||
return digit; |
||||
} |
||||
|
||||
private final Charset charset; |
||||
|
||||
/** |
||||
* Creates a new codec with the default charset name {@link #DEFAULT_CHARSET} |
||||
*/ |
||||
public Hex() { |
||||
// use default encoding
|
||||
this.charset = DEFAULT_CHARSET; |
||||
} |
||||
|
||||
/** |
||||
* Creates a new codec with the given Charset. |
||||
* |
||||
* @param charset |
||||
* the charset. |
||||
* @since 1.7 |
||||
*/ |
||||
public Hex(final Charset charset) { |
||||
this.charset = charset; |
||||
} |
||||
|
||||
/** |
||||
* Creates a new codec with the given charset name. |
||||
* |
||||
* @param charsetName |
||||
* the charset name. |
||||
* @throws java.nio.charset.UnsupportedCharsetException |
||||
* If the named charset is unavailable |
||||
* @since 1.4 |
||||
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable |
||||
*/ |
||||
public Hex(final String charsetName) { |
||||
this(Charset.forName(charsetName)); |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of character bytes representing hexadecimal values into an array of bytes of those same values. |
||||
* The returned array will be half the length of the passed array, as it takes two characters to represent any given |
||||
* byte. An exception is thrown if the passed char array has an odd number of elements. |
||||
* |
||||
* @param array |
||||
* An array of character bytes containing hexadecimal digits |
||||
* @return A byte array containing binary data decoded from the supplied byte array (representing characters). |
||||
* @throws DecoderException |
||||
* Thrown if an odd number of characters is supplied to this function |
||||
* @see #decodeHex(char[]) |
||||
*/ |
||||
@Override |
||||
public byte[] decode(final byte[] array) throws DecoderException { |
||||
return decodeHex(new String(array, getCharset()).toCharArray()); |
||||
} |
||||
|
||||
/** |
||||
* Converts a buffer of character bytes representing hexadecimal values into an array of bytes of those same values. |
||||
* The returned array will be half the length of the passed array, as it takes two characters to represent any given |
||||
* byte. An exception is thrown if the passed char array has an odd number of elements. |
||||
* |
||||
* @param buffer |
||||
* An array of character bytes containing hexadecimal digits |
||||
* @return A byte array containing binary data decoded from the supplied byte array (representing characters). |
||||
* @throws DecoderException |
||||
* Thrown if an odd number of characters is supplied to this function |
||||
* @see #decodeHex(char[]) |
||||
* @since 1.11 |
||||
*/ |
||||
public byte[] decode(final ByteBuffer buffer) throws DecoderException { |
||||
return decodeHex(new String(buffer.array(), getCharset()).toCharArray()); |
||||
} |
||||
|
||||
/** |
||||
* Converts a String or an array of character bytes representing hexadecimal values into an array of bytes of those |
||||
* same values. The returned array will be half the length of the passed String or array, as it takes two characters |
||||
* to represent any given byte. An exception is thrown if the passed char array has an odd number of elements. |
||||
* |
||||
* @param object |
||||
* A String, ByteBuffer, byte[], or an array of character bytes containing hexadecimal digits |
||||
* @return A byte array containing binary data decoded from the supplied byte array (representing characters). |
||||
* @throws DecoderException |
||||
* Thrown if an odd number of characters is supplied to this function or the object is not a String or |
||||
* char[] |
||||
* @see #decodeHex(char[]) |
||||
*/ |
||||
@Override |
||||
public Object decode(final Object object) throws DecoderException { |
||||
if (object instanceof String) { |
||||
return decode(((String) object).toCharArray()); |
||||
} else if (object instanceof byte[]) { |
||||
return decode((byte[]) object); |
||||
} else if (object instanceof ByteBuffer) { |
||||
return decode((ByteBuffer) object); |
||||
} else { |
||||
try { |
||||
return decodeHex((char[]) object); |
||||
} catch (final ClassCastException e) { |
||||
throw new DecoderException(e.getMessage(), e); |
||||
} |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Converts an array of bytes into an array of bytes for the characters representing the hexadecimal values of each |
||||
* byte in order. The returned array will be double the length of the passed array, as it takes two characters to |
||||
* represent any given byte. |
||||
* <p> |
||||
* The conversion from hexadecimal characters to the returned bytes is performed with the charset named by |
||||
* {@link #getCharset()}. |
||||
* </p> |
||||
* |
||||
* @param array |
||||
* a byte[] to convert to Hex characters |
||||
* @return A byte[] containing the bytes of the lower-case hexadecimal characters |
||||
* @since 1.7 No longer throws IllegalStateException if the charsetName is invalid. |
||||
* @see #encodeHex(byte[]) |
||||
*/ |
||||
@Override |
||||
public byte[] encode(final byte[] array) { |
||||
return encodeHexString(array).getBytes(this.getCharset()); |
||||
} |
||||
|
||||
/** |
||||
* Converts byte buffer into an array of bytes for the characters representing the hexadecimal values of each |
||||
* byte in order. The returned array will be double the length of the passed array, as it takes two characters to |
||||
* represent any given byte. |
||||
* <p> |
||||
* The conversion from hexadecimal characters to the returned bytes is performed with the charset named by |
||||
* {@link #getCharset()}. |
||||
* </p> |
||||
* |
||||
* @param array |
||||
* a byte buffer to convert to Hex characters |
||||
* @return A byte[] containing the bytes of the lower-case hexadecimal characters |
||||
* @see #encodeHex(byte[]) |
||||
* @since 1.11 |
||||
*/ |
||||
public byte[] encode(final ByteBuffer array) { |
||||
return encodeHexString(array).getBytes(this.getCharset()); |
||||
} |
||||
|
||||
/** |
||||
* Converts a String or an array of bytes into an array of characters representing the hexadecimal values of each |
||||
* byte in order. The returned array will be double the length of the passed String or array, as it takes two |
||||
* characters to represent any given byte. |
||||
* <p> |
||||
* The conversion from hexadecimal characters to bytes to be encoded to performed with the charset named by |
||||
* {@link #getCharset()}. |
||||
* </p> |
||||
* |
||||
* @param object |
||||
* a String, ByteBuffer, or byte[] to convert to Hex characters |
||||
* @return A char[] containing lower-case hexadecimal characters |
||||
* @throws EncoderException |
||||
* Thrown if the given object is not a String or byte[] |
||||
* @see #encodeHex(byte[]) |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object object) throws EncoderException { |
||||
byte[] byteArray; |
||||
if (object instanceof String) { |
||||
byteArray = ((String) object).getBytes(this.getCharset()); |
||||
} else if (object instanceof ByteBuffer) { |
||||
byteArray = ((ByteBuffer) object).array(); |
||||
} else { |
||||
try { |
||||
byteArray = (byte[]) object; |
||||
} catch (final ClassCastException e) { |
||||
throw new EncoderException(e.getMessage(), e); |
||||
} |
||||
} |
||||
return encodeHex(byteArray); |
||||
} |
||||
|
||||
/** |
||||
* Gets the charset. |
||||
* |
||||
* @return the charset. |
||||
* @since 1.7 |
||||
*/ |
||||
public Charset getCharset() { |
||||
return this.charset; |
||||
} |
||||
|
||||
/** |
||||
* Gets the charset name. |
||||
* |
||||
* @return the charset name. |
||||
* @since 1.4 |
||||
*/ |
||||
public String getCharsetName() { |
||||
return this.charset.name(); |
||||
} |
||||
|
||||
/** |
||||
* Returns a string representation of the object, which includes the charset name. |
||||
* |
||||
* @return a string representation of the object. |
||||
*/ |
||||
@Override |
||||
public String toString() { |
||||
return super.toString() + "[charsetName=" + this.charset + "]"; |
||||
} |
||||
} |
@ -0,0 +1,420 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.binary; |
||||
|
||||
import java.io.UnsupportedEncodingException; |
||||
import java.nio.ByteBuffer; |
||||
import java.nio.charset.Charset; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.CharEncoding; |
||||
import com.fr.third.org.apache.commons.codec.Charsets; |
||||
|
||||
/** |
||||
* Converts String to and from bytes using the encodings required by the Java specification. These encodings are |
||||
* specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"> |
||||
* Standard charsets</a>. |
||||
* |
||||
* <p>This class is immutable and thread-safe.</p> |
||||
* |
||||
* @see CharEncoding |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @version $Id: StringUtils.java 1789539 2017-03-30 16:36:28Z sebb $ |
||||
* @since 1.4 |
||||
*/ |
||||
public class StringUtils { |
||||
|
||||
/** |
||||
* <p> |
||||
* Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters. |
||||
* </p> |
||||
* |
||||
* <p> |
||||
* <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal. |
||||
* The comparison is case sensitive. |
||||
* </p> |
||||
* |
||||
* <pre> |
||||
* StringUtils.equals(null, null) = true |
||||
* StringUtils.equals(null, "abc") = false |
||||
* StringUtils.equals("abc", null) = false |
||||
* StringUtils.equals("abc", "abc") = true |
||||
* StringUtils.equals("abc", "ABC") = false |
||||
* </pre> |
||||
* |
||||
* <p> |
||||
* Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release). |
||||
* </p> |
||||
* |
||||
* @see Object#equals(Object) |
||||
* @param cs1 |
||||
* the first CharSequence, may be <code>null</code> |
||||
* @param cs2 |
||||
* the second CharSequence, may be <code>null</code> |
||||
* @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code> |
||||
* @since 1.10 |
||||
*/ |
||||
public static boolean equals(final CharSequence cs1, final CharSequence cs2) { |
||||
if (cs1 == cs2) { |
||||
return true; |
||||
} |
||||
if (cs1 == null || cs2 == null) { |
||||
return false; |
||||
} |
||||
if (cs1 instanceof String && cs2 instanceof String) { |
||||
return cs1.equals(cs2); |
||||
} |
||||
return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length()); |
||||
} |
||||
|
||||
/** |
||||
* Calls {@link String#getBytes(Charset)} |
||||
* |
||||
* @param string |
||||
* The string to encode (if null, return null). |
||||
* @param charset |
||||
* The {@link Charset} to encode the <code>String</code> |
||||
* @return the encoded bytes |
||||
*/ |
||||
private static byte[] getBytes(final String string, final Charset charset) { |
||||
if (string == null) { |
||||
return null; |
||||
} |
||||
return string.getBytes(charset); |
||||
} |
||||
|
||||
/** |
||||
* Calls {@link String#getBytes(Charset)} |
||||
* |
||||
* @param string |
||||
* The string to encode (if null, return null). |
||||
* @param charset |
||||
* The {@link Charset} to encode the <code>String</code> |
||||
* @return the encoded bytes |
||||
*/ |
||||
private static ByteBuffer getByteBuffer(final String string, final Charset charset) { |
||||
if (string == null) { |
||||
return null; |
||||
} |
||||
return ByteBuffer.wrap(string.getBytes(charset)); |
||||
} |
||||
|
||||
/** |
||||
* Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte |
||||
* array. |
||||
* |
||||
* @param string |
||||
* the String to encode, may be <code>null</code> |
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code> |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @see #getBytesUnchecked(String, String) |
||||
* @since 1.11 |
||||
*/ |
||||
public static ByteBuffer getByteBufferUtf8(final String string) { |
||||
return getByteBuffer(string, Charsets.UTF_8); |
||||
} |
||||
|
||||
/** |
||||
* Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new |
||||
* byte array. |
||||
* |
||||
* @param string |
||||
* the String to encode, may be <code>null</code> |
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code> |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @see #getBytesUnchecked(String, String) |
||||
*/ |
||||
public static byte[] getBytesIso8859_1(final String string) { |
||||
return getBytes(string, Charsets.ISO_8859_1); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte |
||||
* array. |
||||
* <p> |
||||
* This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which |
||||
* should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. |
||||
* </p> |
||||
* |
||||
* @param string |
||||
* the String to encode, may be <code>null</code> |
||||
* @param charsetName |
||||
* The name of a required {@link Charset} |
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code> |
||||
* @throws IllegalStateException |
||||
* Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a |
||||
* required charset name. |
||||
* @see CharEncoding |
||||
* @see String#getBytes(String) |
||||
*/ |
||||
public static byte[] getBytesUnchecked(final String string, final String charsetName) { |
||||
if (string == null) { |
||||
return null; |
||||
} |
||||
try { |
||||
return string.getBytes(charsetName); |
||||
} catch (final UnsupportedEncodingException e) { |
||||
throw StringUtils.newIllegalStateException(charsetName, e); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte |
||||
* array. |
||||
* |
||||
* @param string |
||||
* the String to encode, may be <code>null</code> |
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code> |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @see #getBytesUnchecked(String, String) |
||||
*/ |
||||
public static byte[] getBytesUsAscii(final String string) { |
||||
return getBytes(string, Charsets.US_ASCII); |
||||
} |
||||
|
||||
/** |
||||
* Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte |
||||
* array. |
||||
* |
||||
* @param string |
||||
* the String to encode, may be <code>null</code> |
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code> |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @see #getBytesUnchecked(String, String) |
||||
*/ |
||||
public static byte[] getBytesUtf16(final String string) { |
||||
return getBytes(string, Charsets.UTF_16); |
||||
} |
||||
|
||||
/** |
||||
* Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte |
||||
* array. |
||||
* |
||||
* @param string |
||||
* the String to encode, may be <code>null</code> |
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code> |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @see #getBytesUnchecked(String, String) |
||||
*/ |
||||
public static byte[] getBytesUtf16Be(final String string) { |
||||
return getBytes(string, Charsets.UTF_16BE); |
||||
} |
||||
|
||||
/** |
||||
* Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte |
||||
* array. |
||||
* |
||||
* @param string |
||||
* the String to encode, may be <code>null</code> |
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code> |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @see #getBytesUnchecked(String, String) |
||||
*/ |
||||
public static byte[] getBytesUtf16Le(final String string) { |
||||
return getBytes(string, Charsets.UTF_16LE); |
||||
} |
||||
|
||||
/** |
||||
* Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte |
||||
* array. |
||||
* |
||||
* @param string |
||||
* the String to encode, may be <code>null</code> |
||||
* @return encoded bytes, or <code>null</code> if the input string was <code>null</code> |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @see #getBytesUnchecked(String, String) |
||||
*/ |
||||
public static byte[] getBytesUtf8(final String string) { |
||||
return getBytes(string, Charsets.UTF_8); |
||||
} |
||||
|
||||
private static IllegalStateException newIllegalStateException(final String charsetName, |
||||
final UnsupportedEncodingException e) { |
||||
return new IllegalStateException(charsetName + ": " + e); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. |
||||
* |
||||
* @param bytes |
||||
* The bytes to be decoded into characters |
||||
* @param charset |
||||
* The {@link Charset} to encode the <code>String</code>; not {@code null} |
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the given charset, |
||||
* or <code>null</code> if the input byte array was <code>null</code>. |
||||
* @throws NullPointerException |
||||
* Thrown if charset is {@code null} |
||||
*/ |
||||
private static String newString(final byte[] bytes, final Charset charset) { |
||||
return bytes == null ? null : new String(bytes, charset); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. |
||||
* <p> |
||||
* This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which |
||||
* should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. |
||||
* </p> |
||||
* |
||||
* @param bytes |
||||
* The bytes to be decoded into characters, may be <code>null</code> |
||||
* @param charsetName |
||||
* The name of a required {@link Charset} |
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the given charset, |
||||
* or <code>null</code> if the input byte array was <code>null</code>. |
||||
* @throws IllegalStateException |
||||
* Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a |
||||
* required charset name. |
||||
* @see CharEncoding |
||||
* @see String#String(byte[], String) |
||||
*/ |
||||
public static String newString(final byte[] bytes, final String charsetName) { |
||||
if (bytes == null) { |
||||
return null; |
||||
} |
||||
try { |
||||
return new String(bytes, charsetName); |
||||
} catch (final UnsupportedEncodingException e) { |
||||
throw StringUtils.newIllegalStateException(charsetName, e); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset. |
||||
* |
||||
* @param bytes |
||||
* The bytes to be decoded into characters, may be <code>null</code> |
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or |
||||
* <code>null</code> if the input byte array was <code>null</code>. |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
*/ |
||||
public static String newStringIso8859_1(final byte[] bytes) { |
||||
return newString(bytes, Charsets.ISO_8859_1); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset. |
||||
* |
||||
* @param bytes |
||||
* The bytes to be decoded into characters |
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset, |
||||
* or <code>null</code> if the input byte array was <code>null</code>. |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
*/ |
||||
public static String newStringUsAscii(final byte[] bytes) { |
||||
return newString(bytes, Charsets.US_ASCII); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset. |
||||
* |
||||
* @param bytes |
||||
* The bytes to be decoded into characters |
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset |
||||
* or <code>null</code> if the input byte array was <code>null</code>. |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
*/ |
||||
public static String newStringUtf16(final byte[] bytes) { |
||||
return newString(bytes, Charsets.UTF_16); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset. |
||||
* |
||||
* @param bytes |
||||
* The bytes to be decoded into characters |
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset, |
||||
* or <code>null</code> if the input byte array was <code>null</code>. |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
*/ |
||||
public static String newStringUtf16Be(final byte[] bytes) { |
||||
return newString(bytes, Charsets.UTF_16BE); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset. |
||||
* |
||||
* @param bytes |
||||
* The bytes to be decoded into characters |
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset, |
||||
* or <code>null</code> if the input byte array was <code>null</code>. |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
*/ |
||||
public static String newStringUtf16Le(final byte[] bytes) { |
||||
return newString(bytes, Charsets.UTF_16LE); |
||||
} |
||||
|
||||
/** |
||||
* Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset. |
||||
* |
||||
* @param bytes |
||||
* The bytes to be decoded into characters |
||||
* @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset, |
||||
* or <code>null</code> if the input byte array was <code>null</code>. |
||||
* @throws NullPointerException |
||||
* Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is |
||||
* required by the Java platform specification. |
||||
* @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException |
||||
*/ |
||||
public static String newStringUtf8(final byte[] bytes) { |
||||
return newString(bytes, Charsets.UTF_8); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,21 @@
|
||||
<!-- |
||||
Licensed to the Apache Software Foundation (ASF) under one or more |
||||
contributor license agreements. See the NOTICE file distributed with |
||||
this work for additional information regarding copyright ownership. |
||||
The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
(the "License"); you may not use this file except in compliance with |
||||
the License. You may obtain a copy of the License at |
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
Unless required by applicable law or agreed to in writing, software |
||||
distributed under the License is distributed on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
See the License for the specific language governing permissions and |
||||
limitations under the License. |
||||
--> |
||||
<html> |
||||
<body> |
||||
Base64, Base32, Binary, and Hexadecimal String encoding and decoding. |
||||
</body> |
||||
</html> |
@ -0,0 +1,150 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.cli; |
||||
|
||||
import java.io.File; |
||||
import java.io.IOException; |
||||
import java.nio.charset.Charset; |
||||
import java.security.MessageDigest; |
||||
import java.util.Arrays; |
||||
import java.util.Locale; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.binary.Hex; |
||||
import com.fr.third.org.apache.commons.codec.digest.DigestUtils; |
||||
import com.fr.third.org.apache.commons.codec.digest.MessageDigestAlgorithms; |
||||
|
||||
/** |
||||
* A minimal command line to run digest over files, directories or a string |
||||
* |
||||
* @see #main(String[]) |
||||
* @since 1.11 |
||||
*/ |
||||
public class Digest { |
||||
|
||||
/** |
||||
* Runs the digest algorithm in {@code args[0]} on the file in {@code args[1]}. If there is no {@code args[1]}, use |
||||
* standard input. |
||||
* |
||||
* <p> |
||||
* The algorithm can also be {@code ALL} or {@code *} to output one line for each known algorithm. |
||||
* </p> |
||||
* |
||||
* @param args |
||||
* {@code args[0]} is one of {@link MessageDigestAlgorithms} name, |
||||
* {@link MessageDigest} name, {@code ALL}, or {@code *}. |
||||
* {@code args[1+]} is a FILE/DIRECTORY/String. |
||||
* @throws IOException if an error occurs |
||||
*/ |
||||
public static void main(final String[] args) throws IOException { |
||||
new Digest(args).run(); |
||||
} |
||||
|
||||
private final String algorithm; |
||||
private final String[] args; |
||||
private final String[] inputs; |
||||
|
||||
private Digest(final String[] args) { |
||||
if (args == null) { |
||||
throw new IllegalArgumentException("args"); |
||||
} |
||||
if (args.length == 0) { |
||||
throw new IllegalArgumentException( |
||||
String.format("Usage: java %s [algorithm] [FILE|DIRECTORY|string] ...", Digest.class.getName())); |
||||
} |
||||
this.args = args; |
||||
algorithm = args[0]; |
||||
if (args.length <= 1) { |
||||
inputs = null; |
||||
} else { |
||||
inputs = new String[args.length -1]; |
||||
System.arraycopy(args, 1, inputs, 0, inputs.length); |
||||
} |
||||
} |
||||
|
||||
private void println(final String prefix, final byte[] digest) { |
||||
println(prefix, digest, null); |
||||
} |
||||
|
||||
private void println(final String prefix, final byte[] digest, final String fileName) { |
||||
// The standard appears to be to print
|
||||
// hex, space, then either space or '*' followed by filename
|
||||
// where '*' is used for binary files
|
||||
// shasum(1) has a -b option which generates " *" separator
|
||||
// we don't distinguish binary files at present
|
||||
System.out.println(prefix + Hex.encodeHexString(digest) + (fileName != null ? " " + fileName : "")); |
||||
} |
||||
|
||||
private void run() throws IOException { |
||||
if (algorithm.equalsIgnoreCase("ALL") || algorithm.equals("*")) { |
||||
run(MessageDigestAlgorithms.values()); |
||||
return; |
||||
} |
||||
final MessageDigest messageDigest = DigestUtils.getDigest(algorithm, null); |
||||
if (messageDigest != null) { |
||||
run("", messageDigest); |
||||
} else { |
||||
run("", DigestUtils.getDigest(algorithm.toUpperCase(Locale.ROOT))); |
||||
} |
||||
} |
||||
|
||||
private void run(final String[] digestAlgorithms) throws IOException { |
||||
for (final String messageDigestAlgorithm : digestAlgorithms) { |
||||
if (DigestUtils.isAvailable(messageDigestAlgorithm)) { |
||||
run(messageDigestAlgorithm + " ", messageDigestAlgorithm); |
||||
} |
||||
} |
||||
} |
||||
|
||||
private void run(final String prefix, final MessageDigest messageDigest) throws IOException { |
||||
if (inputs == null) { |
||||
println(prefix, DigestUtils.digest(messageDigest, System.in)); |
||||
return; |
||||
} |
||||
for(final String source : inputs) { |
||||
final File file = new File(source); |
||||
if (file.isFile()) { |
||||
println(prefix, DigestUtils.digest(messageDigest, file), source); |
||||
} else if (file.isDirectory()) { |
||||
final File[] listFiles = file.listFiles(); |
||||
if (listFiles != null) { |
||||
run(prefix, messageDigest, listFiles); |
||||
} |
||||
} else { |
||||
// use the default charset for the command-line parameter
|
||||
final byte[] bytes = source.getBytes(Charset.defaultCharset()); |
||||
println(prefix, DigestUtils.digest(messageDigest, bytes)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
private void run(final String prefix, final MessageDigest messageDigest, final File[] files) throws IOException { |
||||
for (final File file : files) { |
||||
if (file.isFile()) { |
||||
println(prefix, DigestUtils.digest(messageDigest, file), file.getName()); |
||||
} |
||||
} |
||||
} |
||||
|
||||
private void run(final String prefix, final String messageDigestAlgorithm) throws IOException { |
||||
run(prefix, DigestUtils.getDigest(messageDigestAlgorithm)); |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
return String.format("%s %s", super.toString(), Arrays.toString(args)); |
||||
} |
||||
} |
@ -0,0 +1,79 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.digest; |
||||
|
||||
import java.util.Random; |
||||
|
||||
/** |
||||
* Base64 like method to convert binary bytes into ASCII chars. |
||||
* |
||||
* TODO: Can Base64 be reused? |
||||
* |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* </p> |
||||
* |
||||
* @version $Id: B64.java 1435550 2013-01-19 14:09:52Z tn $ |
||||
* @since 1.7 |
||||
*/ |
||||
class B64 { |
||||
|
||||
/** |
||||
* Table with characters for Base64 transformation. |
||||
*/ |
||||
static final String B64T = "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; |
||||
|
||||
/** |
||||
* Base64 like conversion of bytes to ASCII chars. |
||||
* |
||||
* @param b2 |
||||
* A byte from the result. |
||||
* @param b1 |
||||
* A byte from the result. |
||||
* @param b0 |
||||
* A byte from the result. |
||||
* @param outLen |
||||
* The number of expected output chars. |
||||
* @param buffer |
||||
* Where the output chars is appended to. |
||||
*/ |
||||
static void b64from24bit(final byte b2, final byte b1, final byte b0, final int outLen, |
||||
final StringBuilder buffer) { |
||||
// The bit masking is necessary because the JVM byte type is signed!
|
||||
int w = ((b2 << 16) & 0x00ffffff) | ((b1 << 8) & 0x00ffff) | (b0 & 0xff); |
||||
// It's effectively a "for" loop but kept to resemble the original C code.
|
||||
int n = outLen; |
||||
while (n-- > 0) { |
||||
buffer.append(B64T.charAt(w & 0x3f)); |
||||
w >>= 6; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Generates a string of random chars from the B64T set. |
||||
* |
||||
* @param num |
||||
* Number of chars to generate. |
||||
*/ |
||||
static String getRandomSalt(final int num) { |
||||
final StringBuilder saltString = new StringBuilder(); |
||||
for (int i = 1; i <= num; i++) { |
||||
saltString.append(B64T.charAt(new Random().nextInt(B64T.length()))); |
||||
} |
||||
return saltString.toString(); |
||||
} |
||||
} |
@ -0,0 +1,153 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.digest; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.Charsets; |
||||
|
||||
/** |
||||
* GNU libc crypt(3) compatible hash method. |
||||
* <p> |
||||
* See {@link #crypt(String, String)} for further details. |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* |
||||
* @version $Id: Crypt.java 1744646 2016-05-20 00:11:45Z sebb $ |
||||
* @since 1.7 |
||||
*/ |
||||
public class Crypt { |
||||
|
||||
/** |
||||
* Encrypts a password in a crypt(3) compatible way. |
||||
* <p> |
||||
* A random salt and the default algorithm (currently SHA-512) are used. See {@link #crypt(String, String)} for |
||||
* details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext password |
||||
* @return hash value |
||||
* @throws RuntimeException |
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String crypt(final byte[] keyBytes) { |
||||
return crypt(keyBytes, null); |
||||
} |
||||
|
||||
/** |
||||
* Encrypts a password in a crypt(3) compatible way. |
||||
* <p> |
||||
* If no salt is provided, a random salt and the default algorithm (currently SHA-512) will be used. See |
||||
* {@link #crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext password |
||||
* @param salt |
||||
* salt value |
||||
* @return hash value |
||||
* @throws IllegalArgumentException |
||||
* if the salt does not match the allowed pattern |
||||
* @throws RuntimeException |
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String crypt(final byte[] keyBytes, final String salt) { |
||||
if (salt == null) { |
||||
return Sha2Crypt.sha512Crypt(keyBytes); |
||||
} else if (salt.startsWith(Sha2Crypt.SHA512_PREFIX)) { |
||||
return Sha2Crypt.sha512Crypt(keyBytes, salt); |
||||
} else if (salt.startsWith(Sha2Crypt.SHA256_PREFIX)) { |
||||
return Sha2Crypt.sha256Crypt(keyBytes, salt); |
||||
} else if (salt.startsWith(Md5Crypt.MD5_PREFIX)) { |
||||
return Md5Crypt.md5Crypt(keyBytes, salt); |
||||
} else { |
||||
return UnixCrypt.crypt(keyBytes, salt); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Calculates the digest using the strongest crypt(3) algorithm. |
||||
* <p> |
||||
* A random salt and the default algorithm (currently SHA-512) are used. |
||||
* |
||||
* @see #crypt(String, String) |
||||
* @param key |
||||
* plaintext password |
||||
* @return hash value |
||||
* @throws RuntimeException |
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String crypt(final String key) { |
||||
return crypt(key, null); |
||||
} |
||||
|
||||
/** |
||||
* Encrypts a password in a crypt(3) compatible way. |
||||
* <p> |
||||
* The exact algorithm depends on the format of the salt string: |
||||
* <ul> |
||||
* <li>SHA-512 salts start with {@code $6$} and are up to 16 chars long. |
||||
* <li>SHA-256 salts start with {@code $5$} and are up to 16 chars long |
||||
* <li>MD5 salts start with {@code $1$} and are up to 8 chars long |
||||
* <li>DES, the traditional UnixCrypt algorithm is used with only 2 chars |
||||
* <li>Only the first 8 chars of the passwords are used in the DES algorithm! |
||||
* </ul> |
||||
* The magic strings {@code "$apr1$"} and {@code "$2a$"} are not recognized by this method as its output should be |
||||
* identical with that of the libc implementation. |
||||
* <p> |
||||
* The rest of the salt string is drawn from the set {@code [a-zA-Z0-9./]} and is cut at the maximum length of if a |
||||
* {@code "$"} sign is encountered. It is therefore valid to enter a complete hash value as salt to e.g. verify a |
||||
* password with: |
||||
* |
||||
* <pre> |
||||
* storedPwd.equals(crypt(enteredPwd, storedPwd)) |
||||
* </pre> |
||||
* <p> |
||||
* The resulting string starts with the marker string ({@code $n$}), where n is the same as the input salt. |
||||
* The salt is then appended, followed by a {@code "$"} sign. |
||||
* This is followed by the actual hash value. |
||||
* For DES the string only contains the salt and actual hash. |
||||
* The total length is dependent on the algorithm used: |
||||
* <ul> |
||||
* <li>SHA-512: 106 chars |
||||
* <li>SHA-256: 63 chars |
||||
* <li>MD5: 34 chars |
||||
* <li>DES: 13 chars |
||||
* </ul> |
||||
* <p> |
||||
* Example: |
||||
* |
||||
* <pre> |
||||
* crypt("secret", "$1$xxxx") => "$1$xxxx$aMkevjfEIpa35Bh3G4bAc." |
||||
* crypt("secret", "xx") => "xxWAum7tHdIUw" |
||||
* </pre> |
||||
* <p> |
||||
* This method comes in a variation that accepts a byte[] array to support input strings that are not encoded in |
||||
* UTF-8 but e.g. in ISO-8859-1 where equal characters result in different byte values. |
||||
* |
||||
* @see "The man page of the libc crypt (3) function." |
||||
* @param key |
||||
* plaintext password as entered by the used |
||||
* @param salt |
||||
* salt value |
||||
* @return hash value, i.e. encrypted password including the salt string |
||||
* @throws IllegalArgumentException |
||||
* if the salt does not match the allowed pattern |
||||
* @throws RuntimeException |
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. * |
||||
*/ |
||||
public static String crypt(final String key, final String salt) { |
||||
return crypt(key.getBytes(Charsets.UTF_8), salt); |
||||
} |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,126 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.digest; |
||||
|
||||
/** |
||||
* Standard {@link HmacUtils} algorithm names from the <cite>Java Cryptography Architecture Standard Algorithm Name |
||||
* Documentation</cite>. |
||||
* |
||||
* <p> |
||||
* <strong>Note: Not all JCE implementations support all the algorithms in this enum.</strong> |
||||
* </p> |
||||
* |
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> Java |
||||
* 6 Cryptography Architecture Sun Providers Documentation</a> |
||||
* @see <a href="http://docs.oracle.com/javase/7/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> Java |
||||
* 7 Cryptography Architecture Sun Providers Documentation</a> |
||||
* @see <a href="http://docs.oracle.com/javase/8/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> Java |
||||
* 8 Cryptography Architecture Sun Providers Documentation</a> |
||||
* @see <a href= |
||||
* "http://docs.oracle.com/javase/9/security/oracleproviders.htm#JSSEC-GUID-A47B1249-593C-4C38-A0D0-68FA7681E0A7"> |
||||
* Java 9 Cryptography Architecture Sun Providers Documentation</a> |
||||
* @since 1.10 |
||||
* @version $Id: HmacAlgorithms.java 1811624 2017-10-09 23:07:49Z ggregory $ |
||||
*/ |
||||
public enum HmacAlgorithms { |
||||
|
||||
/** |
||||
* The HmacMD5 Message Authentication Code (MAC) algorithm specified in RFC 2104 and RFC 1321. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this standard MAC algorithm. |
||||
* </p> |
||||
*/ |
||||
HMAC_MD5("HmacMD5"), |
||||
|
||||
/** |
||||
* The HmacSHA1 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this standard MAC algorithm. |
||||
* </p> |
||||
*/ |
||||
HMAC_SHA_1("HmacSHA1"), |
||||
|
||||
/** |
||||
* The HmacSHA224 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2. |
||||
* <p> |
||||
* Every implementation of the Java 8+ platform is required to support this standard MAC algorithm. |
||||
* </p> |
||||
* @since 1.11 |
||||
*/ |
||||
HMAC_SHA_224("HmacSHA224"), |
||||
|
||||
/** |
||||
* The HmacSHA256 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2. |
||||
* <p> |
||||
* Every implementation of the Java platform is required to support this standard MAC algorithm. |
||||
* </p> |
||||
*/ |
||||
HMAC_SHA_256("HmacSHA256"), |
||||
|
||||
/** |
||||
* The HmacSHA384 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2. |
||||
* <p> |
||||
* This MAC algorithm is <em>optional</em>; not all implementations support it. |
||||
* </p> |
||||
*/ |
||||
HMAC_SHA_384("HmacSHA384"), |
||||
|
||||
/** |
||||
* The HmacSHA512 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2. |
||||
* <p> |
||||
* This MAC algorithm is <em>optional</em>; not all implementations support it. |
||||
* </p> |
||||
*/ |
||||
HMAC_SHA_512("HmacSHA512"); |
||||
|
||||
private final String name; |
||||
|
||||
private HmacAlgorithms(final String algorithm) { |
||||
this.name = algorithm; |
||||
} |
||||
|
||||
/** |
||||
* Gets the algorithm name. |
||||
* |
||||
* @return the algorithm name. |
||||
* @since 1.11 |
||||
*/ |
||||
public String getName() { |
||||
return name; |
||||
} |
||||
|
||||
/** |
||||
* The algorithm name |
||||
* |
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> |
||||
* Java 6 Cryptography Architecture Sun Providers Documentation</a> |
||||
* @see <a href="http://docs.oracle.com/javase/7/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> |
||||
* Java 7 Cryptography Architecture Sun Providers Documentation</a> |
||||
* @see <a href="http://docs.oracle.com/javase/8/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> |
||||
* Java 8 Cryptography Architecture Sun Providers Documentation</a> |
||||
* @see <a href= |
||||
* "http://docs.oracle.com/javase/9/security/oracleproviders.htm#JSSEC-GUID-A47B1249-593C-4C38-A0D0-68FA7681E0A7"> |
||||
* Java 9 Cryptography Architecture Sun Providers Documentation</a> |
||||
* @return The algorithm name ("HmacSHA512" for example) |
||||
*/ |
||||
@Override |
||||
public String toString() { |
||||
return name; |
||||
} |
||||
|
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,302 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.digest; |
||||
|
||||
import java.security.MessageDigest; |
||||
import java.util.Arrays; |
||||
import java.util.regex.Matcher; |
||||
import java.util.regex.Pattern; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.Charsets; |
||||
|
||||
/** |
||||
* The libc crypt() "$1$" and Apache "$apr1$" MD5-based hash algorithm. |
||||
* <p> |
||||
* Based on the public domain ("beer-ware") C implementation from Poul-Henning Kamp which was found at: <a |
||||
* href="http://www.freebsd.org/cgi/cvsweb.cgi/src/lib/libcrypt/crypt-md5.c?rev=1.1;content-type=text%2Fplain"> |
||||
* crypt-md5.c @ freebsd.org</a><br> |
||||
* <p> |
||||
* Source: |
||||
* |
||||
* <pre> |
||||
* $FreeBSD: src/lib/libcrypt/crypt-md5.c,v 1.1 1999/01/21 13:50:09 brandon Exp $ |
||||
* </pre> |
||||
* <p> |
||||
* Conversion to Kotlin and from there to Java in 2012. |
||||
* <p> |
||||
* The C style comments are from the original C code, the ones with "//" from the port. |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* |
||||
* @version $Id: Md5Crypt.java 1744746 2016-05-20 14:19:43Z sebb $ |
||||
* @since 1.7 |
||||
*/ |
||||
public class Md5Crypt { |
||||
|
||||
/** The Identifier of the Apache variant. */ |
||||
static final String APR1_PREFIX = "$apr1$"; |
||||
|
||||
/** The number of bytes of the final hash. */ |
||||
private static final int BLOCKSIZE = 16; |
||||
|
||||
/** The Identifier of this crypt() variant. */ |
||||
static final String MD5_PREFIX = "$1$"; |
||||
|
||||
/** The number of rounds of the big loop. */ |
||||
private static final int ROUNDS = 1000; |
||||
|
||||
/** |
||||
* See {@link #apr1Crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext string to hash. |
||||
* @return the hash value |
||||
* @throws RuntimeException |
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. * |
||||
*/ |
||||
public static String apr1Crypt(final byte[] keyBytes) { |
||||
return apr1Crypt(keyBytes, APR1_PREFIX + B64.getRandomSalt(8)); |
||||
} |
||||
|
||||
/** |
||||
* See {@link #apr1Crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext string to hash. |
||||
* @param salt An APR1 salt. |
||||
* @return the hash value |
||||
* @throws IllegalArgumentException |
||||
* if the salt does not match the allowed pattern |
||||
* @throws RuntimeException |
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String apr1Crypt(final byte[] keyBytes, String salt) { |
||||
// to make the md5Crypt regex happy
|
||||
if (salt != null && !salt.startsWith(APR1_PREFIX)) { |
||||
salt = APR1_PREFIX + salt; |
||||
} |
||||
return Md5Crypt.md5Crypt(keyBytes, salt, APR1_PREFIX); |
||||
} |
||||
|
||||
/** |
||||
* See {@link #apr1Crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext string to hash. |
||||
* @return the hash value |
||||
* @throws RuntimeException |
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String apr1Crypt(final String keyBytes) { |
||||
return apr1Crypt(keyBytes.getBytes(Charsets.UTF_8)); |
||||
} |
||||
|
||||
/** |
||||
* Generates an Apache htpasswd compatible "$apr1$" MD5 based hash value. |
||||
* <p> |
||||
* The algorithm is identical to the crypt(3) "$1$" one but produces different outputs due to the different salt |
||||
* prefix. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext string to hash. |
||||
* @param salt |
||||
* salt string including the prefix and optionally garbage at the end. Will be generated randomly if |
||||
* null. |
||||
* @return the hash value |
||||
* @throws IllegalArgumentException |
||||
* if the salt does not match the allowed pattern |
||||
* @throws RuntimeException |
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String apr1Crypt(final String keyBytes, final String salt) { |
||||
return apr1Crypt(keyBytes.getBytes(Charsets.UTF_8), salt); |
||||
} |
||||
|
||||
/** |
||||
* Generates a libc6 crypt() compatible "$1$" hash value. |
||||
* <p> |
||||
* See {@link Crypt#crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext string to hash. |
||||
* @return the hash value |
||||
* @throws RuntimeException |
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String md5Crypt(final byte[] keyBytes) { |
||||
return md5Crypt(keyBytes, MD5_PREFIX + B64.getRandomSalt(8)); |
||||
} |
||||
|
||||
/** |
||||
* Generates a libc crypt() compatible "$1$" MD5 based hash value. |
||||
* <p> |
||||
* See {@link Crypt#crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext string to hash. |
||||
* @param salt |
||||
* salt string including the prefix and optionally garbage at the end. Will be generated randomly if |
||||
* null. |
||||
* @return the hash value |
||||
* @throws IllegalArgumentException |
||||
* if the salt does not match the allowed pattern |
||||
* @throws RuntimeException |
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String md5Crypt(final byte[] keyBytes, final String salt) { |
||||
return md5Crypt(keyBytes, salt, MD5_PREFIX); |
||||
} |
||||
|
||||
/** |
||||
* Generates a libc6 crypt() "$1$" or Apache htpasswd "$apr1$" hash value. |
||||
* <p> |
||||
* See {@link Crypt#crypt(String, String)} or {@link #apr1Crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext string to hash. |
||||
* @param salt May be null. |
||||
* @param prefix salt prefix |
||||
* @return the hash value |
||||
* @throws IllegalArgumentException |
||||
* if the salt does not match the allowed pattern |
||||
* @throws RuntimeException |
||||
* when a {@link java.security.NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String md5Crypt(final byte[] keyBytes, final String salt, final String prefix) { |
||||
final int keyLen = keyBytes.length; |
||||
|
||||
// Extract the real salt from the given string which can be a complete hash string.
|
||||
String saltString; |
||||
if (salt == null) { |
||||
saltString = B64.getRandomSalt(8); |
||||
} else { |
||||
final Pattern p = Pattern.compile("^" + prefix.replace("$", "\\$") + "([\\.\\/a-zA-Z0-9]{1,8}).*"); |
||||
final Matcher m = p.matcher(salt); |
||||
if (!m.find()) { |
||||
throw new IllegalArgumentException("Invalid salt value: " + salt); |
||||
} |
||||
saltString = m.group(1); |
||||
} |
||||
final byte[] saltBytes = saltString.getBytes(Charsets.UTF_8); |
||||
|
||||
final MessageDigest ctx = DigestUtils.getMd5Digest(); |
||||
|
||||
/* |
||||
* The password first, since that is what is most unknown |
||||
*/ |
||||
ctx.update(keyBytes); |
||||
|
||||
/* |
||||
* Then our magic string |
||||
*/ |
||||
ctx.update(prefix.getBytes(Charsets.UTF_8)); |
||||
|
||||
/* |
||||
* Then the raw salt |
||||
*/ |
||||
ctx.update(saltBytes); |
||||
|
||||
/* |
||||
* Then just as many characters of the MD5(pw,salt,pw) |
||||
*/ |
||||
MessageDigest ctx1 = DigestUtils.getMd5Digest(); |
||||
ctx1.update(keyBytes); |
||||
ctx1.update(saltBytes); |
||||
ctx1.update(keyBytes); |
||||
byte[] finalb = ctx1.digest(); |
||||
int ii = keyLen; |
||||
while (ii > 0) { |
||||
ctx.update(finalb, 0, ii > 16 ? 16 : ii); |
||||
ii -= 16; |
||||
} |
||||
|
||||
/* |
||||
* Don't leave anything around in vm they could use. |
||||
*/ |
||||
Arrays.fill(finalb, (byte) 0); |
||||
|
||||
/* |
||||
* Then something really weird... |
||||
*/ |
||||
ii = keyLen; |
||||
final int j = 0; |
||||
while (ii > 0) { |
||||
if ((ii & 1) == 1) { |
||||
ctx.update(finalb[j]); |
||||
} else { |
||||
ctx.update(keyBytes[j]); |
||||
} |
||||
ii >>= 1; |
||||
} |
||||
|
||||
/* |
||||
* Now make the output string |
||||
*/ |
||||
final StringBuilder passwd = new StringBuilder(prefix + saltString + "$"); |
||||
finalb = ctx.digest(); |
||||
|
||||
/* |
||||
* and now, just to make sure things don't run too fast On a 60 Mhz Pentium this takes 34 msec, so you would |
||||
* need 30 seconds to build a 1000 entry dictionary... |
||||
*/ |
||||
for (int i = 0; i < ROUNDS; i++) { |
||||
ctx1 = DigestUtils.getMd5Digest(); |
||||
if ((i & 1) != 0) { |
||||
ctx1.update(keyBytes); |
||||
} else { |
||||
ctx1.update(finalb, 0, BLOCKSIZE); |
||||
} |
||||
|
||||
if (i % 3 != 0) { |
||||
ctx1.update(saltBytes); |
||||
} |
||||
|
||||
if (i % 7 != 0) { |
||||
ctx1.update(keyBytes); |
||||
} |
||||
|
||||
if ((i & 1) != 0) { |
||||
ctx1.update(finalb, 0, BLOCKSIZE); |
||||
} else { |
||||
ctx1.update(keyBytes); |
||||
} |
||||
finalb = ctx1.digest(); |
||||
} |
||||
|
||||
// The following was nearly identical to the Sha2Crypt code.
|
||||
// Again, the buflen is not really needed.
|
||||
// int buflen = MD5_PREFIX.length() - 1 + salt_string.length() + 1 + BLOCKSIZE + 1;
|
||||
B64.b64from24bit(finalb[0], finalb[6], finalb[12], 4, passwd); |
||||
B64.b64from24bit(finalb[1], finalb[7], finalb[13], 4, passwd); |
||||
B64.b64from24bit(finalb[2], finalb[8], finalb[14], 4, passwd); |
||||
B64.b64from24bit(finalb[3], finalb[9], finalb[15], 4, passwd); |
||||
B64.b64from24bit(finalb[4], finalb[10], finalb[5], 4, passwd); |
||||
B64.b64from24bit((byte) 0, (byte) 0, finalb[11], 2, passwd); |
||||
|
||||
/* |
||||
* Don't leave anything around in vm they could use. |
||||
*/ |
||||
// Is there a better way to do this with the JVM?
|
||||
ctx.reset(); |
||||
ctx1.reset(); |
||||
Arrays.fill(keyBytes, (byte) 0); |
||||
Arrays.fill(saltBytes, (byte) 0); |
||||
Arrays.fill(finalb, (byte) 0); |
||||
|
||||
return passwd.toString(); |
||||
} |
||||
} |
@ -0,0 +1,148 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.digest; |
||||
|
||||
import java.security.MessageDigest; |
||||
|
||||
/** |
||||
* Standard {@link MessageDigest} algorithm names from the <cite>Java Cryptography Architecture Standard Algorithm Name |
||||
* Documentation</cite>. |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* </p> |
||||
* <p> |
||||
* Java 8 and up: SHA-224. |
||||
* </p> |
||||
* <p> |
||||
* Java 9 and up: SHA3-224, SHA3-256, SHA3-384, SHA3-512. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/StandardNames.html#MessageDigest"> |
||||
* Java 6 Cryptography Architecture Standard Algorithm Name Documentation</a> |
||||
* @see <a href="http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#MessageDigest"> |
||||
* Java 7 Cryptography Architecture Standard Algorithm Name Documentation</a> |
||||
* @see <a href="http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#MessageDigest"> |
||||
* Java 8 Cryptography Architecture Standard Algorithm Name Documentation</a> |
||||
* @see <a href="http://download.java.net/java/jdk9/docs/technotes/guides/security/StandardNames.html#MessageDigest"> |
||||
* Java 9 Cryptography Architecture Standard Algorithm Name Documentation</a> |
||||
* |
||||
* @see <a href="http://dx.doi.org/10.6028/NIST.FIPS.180-4">FIPS PUB 180-4</a> |
||||
* @see <a href="http://dx.doi.org/10.6028/NIST.FIPS.202">FIPS PUB 202</a> |
||||
* @since 1.7 |
||||
* @version $Id: MessageDigestAlgorithms.java 1744728 2016-05-20 12:55:58Z sebb $ |
||||
*/ |
||||
public class MessageDigestAlgorithms { |
||||
|
||||
/** |
||||
* The MD2 message digest algorithm defined in RFC 1319. |
||||
*/ |
||||
public static final String MD2 = "MD2"; |
||||
|
||||
/** |
||||
* The MD5 message digest algorithm defined in RFC 1321. |
||||
*/ |
||||
public static final String MD5 = "MD5"; |
||||
|
||||
/** |
||||
* The SHA-1 hash algorithm defined in the FIPS PUB 180-2. |
||||
*/ |
||||
public static final String SHA_1 = "SHA-1"; |
||||
|
||||
/** |
||||
* The SHA-224 hash algorithm defined in the FIPS PUB 180-3. |
||||
* <p> |
||||
* Present in Oracle Java 8. |
||||
* </p> |
||||
* |
||||
* @since 1.11 |
||||
*/ |
||||
public static final String SHA_224 = "SHA-224"; |
||||
|
||||
/** |
||||
* The SHA-256 hash algorithm defined in the FIPS PUB 180-2. |
||||
*/ |
||||
public static final String SHA_256 = "SHA-256"; |
||||
|
||||
/** |
||||
* The SHA-384 hash algorithm defined in the FIPS PUB 180-2. |
||||
*/ |
||||
public static final String SHA_384 = "SHA-384"; |
||||
|
||||
/** |
||||
* The SHA-512 hash algorithm defined in the FIPS PUB 180-2. |
||||
*/ |
||||
public static final String SHA_512 = "SHA-512"; |
||||
|
||||
/** |
||||
* The SHA3-224 hash algorithm defined in the FIPS PUB 202. |
||||
* <p> |
||||
* Likely to be included in Oracle Java 9 GA. |
||||
* </p> |
||||
* |
||||
* @since 1.11 |
||||
*/ |
||||
public static final String SHA3_224 = "SHA3-224"; |
||||
|
||||
/** |
||||
* The SHA3-256 hash algorithm defined in the FIPS PUB 202. |
||||
* <p> |
||||
* Likely to be included in Oracle Java 9 GA. |
||||
* </p> |
||||
* |
||||
* @since 1.11 |
||||
*/ |
||||
public static final String SHA3_256 = "SHA3-256"; |
||||
|
||||
/** |
||||
* The SHA3-384 hash algorithm defined in the FIPS PUB 202. |
||||
* <p> |
||||
* Likely to be included in Oracle Java 9 GA. |
||||
* </p> |
||||
* |
||||
* @since 1.11 |
||||
*/ |
||||
public static final String SHA3_384 = "SHA3-384"; |
||||
|
||||
/** |
||||
* The SHA3-512 hash algorithm defined in the FIPS PUB 202. |
||||
* <p> |
||||
* Likely to be included in Oracle Java 9 GA. |
||||
* </p> |
||||
* |
||||
* @since 1.11 |
||||
*/ |
||||
public static final String SHA3_512 = "SHA3-512"; |
||||
|
||||
/** |
||||
* Gets all constant values defined in this class. |
||||
* |
||||
* @return all constant values defined in this class. |
||||
* @since 1.11 |
||||
*/ |
||||
public static String[] values() { |
||||
// N.B. do not use a constant array here as that can be changed externally by accident or design
|
||||
return new String[] { |
||||
MD2, MD5, SHA_1, SHA_224, SHA_256, SHA_384, SHA_512, SHA3_224, SHA3_256, SHA3_384, SHA3_512 |
||||
}; |
||||
} |
||||
|
||||
private MessageDigestAlgorithms() { |
||||
// cannot be instantiated.
|
||||
} |
||||
|
||||
} |
@ -0,0 +1,632 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.digest; |
||||
|
||||
import java.util.zip.Checksum; |
||||
|
||||
/** |
||||
* A pure-java implementation of the CRC32 checksum that uses |
||||
* the same polynomial as the built-in native CRC32. |
||||
* |
||||
* This is to avoid the JNI overhead for certain uses of Checksumming |
||||
* where many small pieces of data are checksummed in succession. |
||||
* |
||||
* The current version is ~10x to 1.8x as fast as Sun's native |
||||
* java.util.zip.CRC32 in Java 1.6 |
||||
* |
||||
* Copied from Hadoop 2.6.3. |
||||
* The code agrees with the following file in the 2.6.3 tag: |
||||
* https://git-wip-us.apache.org/repos/asf?p=hadoop.git;a=blob_plain;
|
||||
* f=hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32.java; |
||||
* hb=2120de588b92b9f22b1cc4188761d6a8c61aa778 |
||||
* <p> |
||||
* This class is Not ThreadSafe |
||||
* |
||||
* @see java.util.zip.CRC32 |
||||
* @since 1.11 |
||||
*/ |
||||
public class PureJavaCrc32 implements Checksum { |
||||
|
||||
/** the current CRC value, bit-flipped */ |
||||
private int crc; |
||||
|
||||
/** Create a new PureJavaCrc32 object. */ |
||||
public PureJavaCrc32() { |
||||
_reset(); |
||||
} |
||||
|
||||
@Override |
||||
public long getValue() { |
||||
return (~crc) & 0xffffffffL; |
||||
} |
||||
|
||||
@Override |
||||
public void reset() { |
||||
_reset(); |
||||
} |
||||
|
||||
// called by ctor, so must not be overrideable
|
||||
private void _reset() { |
||||
crc = 0xffffffff; |
||||
} |
||||
|
||||
@Override |
||||
public void update(final byte[] b, final int offset, final int len) { |
||||
int localCrc = crc; |
||||
|
||||
final int remainder = len & 0x7; |
||||
int i = offset; |
||||
for(final int end = offset + len - remainder; i < end; i += 8) { |
||||
final int x = localCrc ^ |
||||
((((b[i ] << 24) >>> 24) + ((b[i+1] << 24) >>> 16)) + |
||||
(((b[i+2] << 24) >>> 8 ) + (b[i+3] << 24))); |
||||
|
||||
localCrc = ((T[((x << 24) >>> 24) + 0x700] ^ T[((x << 16) >>> 24) + 0x600]) ^ |
||||
(T[((x << 8) >>> 24) + 0x500] ^ T[ (x >>> 24) + 0x400])) ^ |
||||
((T[((b[i+4] << 24) >>> 24) + 0x300] ^ T[((b[i+5] << 24) >>> 24) + 0x200]) ^ |
||||
(T[((b[i+6] << 24) >>> 24) + 0x100] ^ T[((b[i+7] << 24) >>> 24)])); |
||||
} |
||||
|
||||
/* loop unroll - duff's device style */ |
||||
switch(remainder) { |
||||
case 7: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; |
||||
case 6: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; |
||||
case 5: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; |
||||
case 4: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; |
||||
case 3: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; |
||||
case 2: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; |
||||
case 1: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; |
||||
default: |
||||
/* nothing */ |
||||
} |
||||
|
||||
// Publish crc out to object
|
||||
crc = localCrc; |
||||
} |
||||
|
||||
@Override |
||||
final public void update(final int b) { |
||||
crc = (crc >>> 8) ^ T[(((crc ^ b) << 24) >>> 24)]; |
||||
} |
||||
|
||||
/* |
||||
* CRC-32 lookup tables generated by the polynomial 0xEDB88320. |
||||
* See also TestPureJavaCrc32.Table. |
||||
*/ |
||||
private static final int[] T = new int[] { |
||||
/* T8_0 */ |
||||
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, |
||||
0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, |
||||
0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, |
||||
0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, |
||||
0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, |
||||
0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, |
||||
0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, |
||||
0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, |
||||
0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, |
||||
0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, |
||||
0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, |
||||
0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, |
||||
0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, |
||||
0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, |
||||
0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, |
||||
0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, |
||||
0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, |
||||
0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, |
||||
0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, |
||||
0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, |
||||
0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, |
||||
0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, |
||||
0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, |
||||
0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, |
||||
0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, |
||||
0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, |
||||
0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, |
||||
0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, |
||||
0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, |
||||
0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, |
||||
0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, |
||||
0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, |
||||
0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, |
||||
0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, |
||||
0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, |
||||
0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, |
||||
0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, |
||||
0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, |
||||
0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, |
||||
0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, |
||||
0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, |
||||
0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, |
||||
0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, |
||||
0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, |
||||
0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, |
||||
0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, |
||||
0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, |
||||
0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, |
||||
0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, |
||||
0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, |
||||
0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, |
||||
0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, |
||||
0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, |
||||
0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, |
||||
0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, |
||||
0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, |
||||
0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, |
||||
0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, |
||||
0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, |
||||
0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, |
||||
0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, |
||||
0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, |
||||
0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, |
||||
0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D, |
||||
/* T8_1 */ |
||||
0x00000000, 0x191B3141, 0x32366282, 0x2B2D53C3, |
||||
0x646CC504, 0x7D77F445, 0x565AA786, 0x4F4196C7, |
||||
0xC8D98A08, 0xD1C2BB49, 0xFAEFE88A, 0xE3F4D9CB, |
||||
0xACB54F0C, 0xB5AE7E4D, 0x9E832D8E, 0x87981CCF, |
||||
0x4AC21251, 0x53D92310, 0x78F470D3, 0x61EF4192, |
||||
0x2EAED755, 0x37B5E614, 0x1C98B5D7, 0x05838496, |
||||
0x821B9859, 0x9B00A918, 0xB02DFADB, 0xA936CB9A, |
||||
0xE6775D5D, 0xFF6C6C1C, 0xD4413FDF, 0xCD5A0E9E, |
||||
0x958424A2, 0x8C9F15E3, 0xA7B24620, 0xBEA97761, |
||||
0xF1E8E1A6, 0xE8F3D0E7, 0xC3DE8324, 0xDAC5B265, |
||||
0x5D5DAEAA, 0x44469FEB, 0x6F6BCC28, 0x7670FD69, |
||||
0x39316BAE, 0x202A5AEF, 0x0B07092C, 0x121C386D, |
||||
0xDF4636F3, 0xC65D07B2, 0xED705471, 0xF46B6530, |
||||
0xBB2AF3F7, 0xA231C2B6, 0x891C9175, 0x9007A034, |
||||
0x179FBCFB, 0x0E848DBA, 0x25A9DE79, 0x3CB2EF38, |
||||
0x73F379FF, 0x6AE848BE, 0x41C51B7D, 0x58DE2A3C, |
||||
0xF0794F05, 0xE9627E44, 0xC24F2D87, 0xDB541CC6, |
||||
0x94158A01, 0x8D0EBB40, 0xA623E883, 0xBF38D9C2, |
||||
0x38A0C50D, 0x21BBF44C, 0x0A96A78F, 0x138D96CE, |
||||
0x5CCC0009, 0x45D73148, 0x6EFA628B, 0x77E153CA, |
||||
0xBABB5D54, 0xA3A06C15, 0x888D3FD6, 0x91960E97, |
||||
0xDED79850, 0xC7CCA911, 0xECE1FAD2, 0xF5FACB93, |
||||
0x7262D75C, 0x6B79E61D, 0x4054B5DE, 0x594F849F, |
||||
0x160E1258, 0x0F152319, 0x243870DA, 0x3D23419B, |
||||
0x65FD6BA7, 0x7CE65AE6, 0x57CB0925, 0x4ED03864, |
||||
0x0191AEA3, 0x188A9FE2, 0x33A7CC21, 0x2ABCFD60, |
||||
0xAD24E1AF, 0xB43FD0EE, 0x9F12832D, 0x8609B26C, |
||||
0xC94824AB, 0xD05315EA, 0xFB7E4629, 0xE2657768, |
||||
0x2F3F79F6, 0x362448B7, 0x1D091B74, 0x04122A35, |
||||
0x4B53BCF2, 0x52488DB3, 0x7965DE70, 0x607EEF31, |
||||
0xE7E6F3FE, 0xFEFDC2BF, 0xD5D0917C, 0xCCCBA03D, |
||||
0x838A36FA, 0x9A9107BB, 0xB1BC5478, 0xA8A76539, |
||||
0x3B83984B, 0x2298A90A, 0x09B5FAC9, 0x10AECB88, |
||||
0x5FEF5D4F, 0x46F46C0E, 0x6DD93FCD, 0x74C20E8C, |
||||
0xF35A1243, 0xEA412302, 0xC16C70C1, 0xD8774180, |
||||
0x9736D747, 0x8E2DE606, 0xA500B5C5, 0xBC1B8484, |
||||
0x71418A1A, 0x685ABB5B, 0x4377E898, 0x5A6CD9D9, |
||||
0x152D4F1E, 0x0C367E5F, 0x271B2D9C, 0x3E001CDD, |
||||
0xB9980012, 0xA0833153, 0x8BAE6290, 0x92B553D1, |
||||
0xDDF4C516, 0xC4EFF457, 0xEFC2A794, 0xF6D996D5, |
||||
0xAE07BCE9, 0xB71C8DA8, 0x9C31DE6B, 0x852AEF2A, |
||||
0xCA6B79ED, 0xD37048AC, 0xF85D1B6F, 0xE1462A2E, |
||||
0x66DE36E1, 0x7FC507A0, 0x54E85463, 0x4DF36522, |
||||
0x02B2F3E5, 0x1BA9C2A4, 0x30849167, 0x299FA026, |
||||
0xE4C5AEB8, 0xFDDE9FF9, 0xD6F3CC3A, 0xCFE8FD7B, |
||||
0x80A96BBC, 0x99B25AFD, 0xB29F093E, 0xAB84387F, |
||||
0x2C1C24B0, 0x350715F1, 0x1E2A4632, 0x07317773, |
||||
0x4870E1B4, 0x516BD0F5, 0x7A468336, 0x635DB277, |
||||
0xCBFAD74E, 0xD2E1E60F, 0xF9CCB5CC, 0xE0D7848D, |
||||
0xAF96124A, 0xB68D230B, 0x9DA070C8, 0x84BB4189, |
||||
0x03235D46, 0x1A386C07, 0x31153FC4, 0x280E0E85, |
||||
0x674F9842, 0x7E54A903, 0x5579FAC0, 0x4C62CB81, |
||||
0x8138C51F, 0x9823F45E, 0xB30EA79D, 0xAA1596DC, |
||||
0xE554001B, 0xFC4F315A, 0xD7626299, 0xCE7953D8, |
||||
0x49E14F17, 0x50FA7E56, 0x7BD72D95, 0x62CC1CD4, |
||||
0x2D8D8A13, 0x3496BB52, 0x1FBBE891, 0x06A0D9D0, |
||||
0x5E7EF3EC, 0x4765C2AD, 0x6C48916E, 0x7553A02F, |
||||
0x3A1236E8, 0x230907A9, 0x0824546A, 0x113F652B, |
||||
0x96A779E4, 0x8FBC48A5, 0xA4911B66, 0xBD8A2A27, |
||||
0xF2CBBCE0, 0xEBD08DA1, 0xC0FDDE62, 0xD9E6EF23, |
||||
0x14BCE1BD, 0x0DA7D0FC, 0x268A833F, 0x3F91B27E, |
||||
0x70D024B9, 0x69CB15F8, 0x42E6463B, 0x5BFD777A, |
||||
0xDC656BB5, 0xC57E5AF4, 0xEE530937, 0xF7483876, |
||||
0xB809AEB1, 0xA1129FF0, 0x8A3FCC33, 0x9324FD72, |
||||
/* T8_2 */ |
||||
0x00000000, 0x01C26A37, 0x0384D46E, 0x0246BE59, |
||||
0x0709A8DC, 0x06CBC2EB, 0x048D7CB2, 0x054F1685, |
||||
0x0E1351B8, 0x0FD13B8F, 0x0D9785D6, 0x0C55EFE1, |
||||
0x091AF964, 0x08D89353, 0x0A9E2D0A, 0x0B5C473D, |
||||
0x1C26A370, 0x1DE4C947, 0x1FA2771E, 0x1E601D29, |
||||
0x1B2F0BAC, 0x1AED619B, 0x18ABDFC2, 0x1969B5F5, |
||||
0x1235F2C8, 0x13F798FF, 0x11B126A6, 0x10734C91, |
||||
0x153C5A14, 0x14FE3023, 0x16B88E7A, 0x177AE44D, |
||||
0x384D46E0, 0x398F2CD7, 0x3BC9928E, 0x3A0BF8B9, |
||||
0x3F44EE3C, 0x3E86840B, 0x3CC03A52, 0x3D025065, |
||||
0x365E1758, 0x379C7D6F, 0x35DAC336, 0x3418A901, |
||||
0x3157BF84, 0x3095D5B3, 0x32D36BEA, 0x331101DD, |
||||
0x246BE590, 0x25A98FA7, 0x27EF31FE, 0x262D5BC9, |
||||
0x23624D4C, 0x22A0277B, 0x20E69922, 0x2124F315, |
||||
0x2A78B428, 0x2BBADE1F, 0x29FC6046, 0x283E0A71, |
||||
0x2D711CF4, 0x2CB376C3, 0x2EF5C89A, 0x2F37A2AD, |
||||
0x709A8DC0, 0x7158E7F7, 0x731E59AE, 0x72DC3399, |
||||
0x7793251C, 0x76514F2B, 0x7417F172, 0x75D59B45, |
||||
0x7E89DC78, 0x7F4BB64F, 0x7D0D0816, 0x7CCF6221, |
||||
0x798074A4, 0x78421E93, 0x7A04A0CA, 0x7BC6CAFD, |
||||
0x6CBC2EB0, 0x6D7E4487, 0x6F38FADE, 0x6EFA90E9, |
||||
0x6BB5866C, 0x6A77EC5B, 0x68315202, 0x69F33835, |
||||
0x62AF7F08, 0x636D153F, 0x612BAB66, 0x60E9C151, |
||||
0x65A6D7D4, 0x6464BDE3, 0x662203BA, 0x67E0698D, |
||||
0x48D7CB20, 0x4915A117, 0x4B531F4E, 0x4A917579, |
||||
0x4FDE63FC, 0x4E1C09CB, 0x4C5AB792, 0x4D98DDA5, |
||||
0x46C49A98, 0x4706F0AF, 0x45404EF6, 0x448224C1, |
||||
0x41CD3244, 0x400F5873, 0x4249E62A, 0x438B8C1D, |
||||
0x54F16850, 0x55330267, 0x5775BC3E, 0x56B7D609, |
||||
0x53F8C08C, 0x523AAABB, 0x507C14E2, 0x51BE7ED5, |
||||
0x5AE239E8, 0x5B2053DF, 0x5966ED86, 0x58A487B1, |
||||
0x5DEB9134, 0x5C29FB03, 0x5E6F455A, 0x5FAD2F6D, |
||||
0xE1351B80, 0xE0F771B7, 0xE2B1CFEE, 0xE373A5D9, |
||||
0xE63CB35C, 0xE7FED96B, 0xE5B86732, 0xE47A0D05, |
||||
0xEF264A38, 0xEEE4200F, 0xECA29E56, 0xED60F461, |
||||
0xE82FE2E4, 0xE9ED88D3, 0xEBAB368A, 0xEA695CBD, |
||||
0xFD13B8F0, 0xFCD1D2C7, 0xFE976C9E, 0xFF5506A9, |
||||
0xFA1A102C, 0xFBD87A1B, 0xF99EC442, 0xF85CAE75, |
||||
0xF300E948, 0xF2C2837F, 0xF0843D26, 0xF1465711, |
||||
0xF4094194, 0xF5CB2BA3, 0xF78D95FA, 0xF64FFFCD, |
||||
0xD9785D60, 0xD8BA3757, 0xDAFC890E, 0xDB3EE339, |
||||
0xDE71F5BC, 0xDFB39F8B, 0xDDF521D2, 0xDC374BE5, |
||||
0xD76B0CD8, 0xD6A966EF, 0xD4EFD8B6, 0xD52DB281, |
||||
0xD062A404, 0xD1A0CE33, 0xD3E6706A, 0xD2241A5D, |
||||
0xC55EFE10, 0xC49C9427, 0xC6DA2A7E, 0xC7184049, |
||||
0xC25756CC, 0xC3953CFB, 0xC1D382A2, 0xC011E895, |
||||
0xCB4DAFA8, 0xCA8FC59F, 0xC8C97BC6, 0xC90B11F1, |
||||
0xCC440774, 0xCD866D43, 0xCFC0D31A, 0xCE02B92D, |
||||
0x91AF9640, 0x906DFC77, 0x922B422E, 0x93E92819, |
||||
0x96A63E9C, 0x976454AB, 0x9522EAF2, 0x94E080C5, |
||||
0x9FBCC7F8, 0x9E7EADCF, 0x9C381396, 0x9DFA79A1, |
||||
0x98B56F24, 0x99770513, 0x9B31BB4A, 0x9AF3D17D, |
||||
0x8D893530, 0x8C4B5F07, 0x8E0DE15E, 0x8FCF8B69, |
||||
0x8A809DEC, 0x8B42F7DB, 0x89044982, 0x88C623B5, |
||||
0x839A6488, 0x82580EBF, 0x801EB0E6, 0x81DCDAD1, |
||||
0x8493CC54, 0x8551A663, 0x8717183A, 0x86D5720D, |
||||
0xA9E2D0A0, 0xA820BA97, 0xAA6604CE, 0xABA46EF9, |
||||
0xAEEB787C, 0xAF29124B, 0xAD6FAC12, 0xACADC625, |
||||
0xA7F18118, 0xA633EB2F, 0xA4755576, 0xA5B73F41, |
||||
0xA0F829C4, 0xA13A43F3, 0xA37CFDAA, 0xA2BE979D, |
||||
0xB5C473D0, 0xB40619E7, 0xB640A7BE, 0xB782CD89, |
||||
0xB2CDDB0C, 0xB30FB13B, 0xB1490F62, 0xB08B6555, |
||||
0xBBD72268, 0xBA15485F, 0xB853F606, 0xB9919C31, |
||||
0xBCDE8AB4, 0xBD1CE083, 0xBF5A5EDA, 0xBE9834ED, |
||||
/* T8_3 */ |
||||
0x00000000, 0xB8BC6765, 0xAA09C88B, 0x12B5AFEE, |
||||
0x8F629757, 0x37DEF032, 0x256B5FDC, 0x9DD738B9, |
||||
0xC5B428EF, 0x7D084F8A, 0x6FBDE064, 0xD7018701, |
||||
0x4AD6BFB8, 0xF26AD8DD, 0xE0DF7733, 0x58631056, |
||||
0x5019579F, 0xE8A530FA, 0xFA109F14, 0x42ACF871, |
||||
0xDF7BC0C8, 0x67C7A7AD, 0x75720843, 0xCDCE6F26, |
||||
0x95AD7F70, 0x2D111815, 0x3FA4B7FB, 0x8718D09E, |
||||
0x1ACFE827, 0xA2738F42, 0xB0C620AC, 0x087A47C9, |
||||
0xA032AF3E, 0x188EC85B, 0x0A3B67B5, 0xB28700D0, |
||||
0x2F503869, 0x97EC5F0C, 0x8559F0E2, 0x3DE59787, |
||||
0x658687D1, 0xDD3AE0B4, 0xCF8F4F5A, 0x7733283F, |
||||
0xEAE41086, 0x525877E3, 0x40EDD80D, 0xF851BF68, |
||||
0xF02BF8A1, 0x48979FC4, 0x5A22302A, 0xE29E574F, |
||||
0x7F496FF6, 0xC7F50893, 0xD540A77D, 0x6DFCC018, |
||||
0x359FD04E, 0x8D23B72B, 0x9F9618C5, 0x272A7FA0, |
||||
0xBAFD4719, 0x0241207C, 0x10F48F92, 0xA848E8F7, |
||||
0x9B14583D, 0x23A83F58, 0x311D90B6, 0x89A1F7D3, |
||||
0x1476CF6A, 0xACCAA80F, 0xBE7F07E1, 0x06C36084, |
||||
0x5EA070D2, 0xE61C17B7, 0xF4A9B859, 0x4C15DF3C, |
||||
0xD1C2E785, 0x697E80E0, 0x7BCB2F0E, 0xC377486B, |
||||
0xCB0D0FA2, 0x73B168C7, 0x6104C729, 0xD9B8A04C, |
||||
0x446F98F5, 0xFCD3FF90, 0xEE66507E, 0x56DA371B, |
||||
0x0EB9274D, 0xB6054028, 0xA4B0EFC6, 0x1C0C88A3, |
||||
0x81DBB01A, 0x3967D77F, 0x2BD27891, 0x936E1FF4, |
||||
0x3B26F703, 0x839A9066, 0x912F3F88, 0x299358ED, |
||||
0xB4446054, 0x0CF80731, 0x1E4DA8DF, 0xA6F1CFBA, |
||||
0xFE92DFEC, 0x462EB889, 0x549B1767, 0xEC277002, |
||||
0x71F048BB, 0xC94C2FDE, 0xDBF98030, 0x6345E755, |
||||
0x6B3FA09C, 0xD383C7F9, 0xC1366817, 0x798A0F72, |
||||
0xE45D37CB, 0x5CE150AE, 0x4E54FF40, 0xF6E89825, |
||||
0xAE8B8873, 0x1637EF16, 0x048240F8, 0xBC3E279D, |
||||
0x21E91F24, 0x99557841, 0x8BE0D7AF, 0x335CB0CA, |
||||
0xED59B63B, 0x55E5D15E, 0x47507EB0, 0xFFEC19D5, |
||||
0x623B216C, 0xDA874609, 0xC832E9E7, 0x708E8E82, |
||||
0x28ED9ED4, 0x9051F9B1, 0x82E4565F, 0x3A58313A, |
||||
0xA78F0983, 0x1F336EE6, 0x0D86C108, 0xB53AA66D, |
||||
0xBD40E1A4, 0x05FC86C1, 0x1749292F, 0xAFF54E4A, |
||||
0x322276F3, 0x8A9E1196, 0x982BBE78, 0x2097D91D, |
||||
0x78F4C94B, 0xC048AE2E, 0xD2FD01C0, 0x6A4166A5, |
||||
0xF7965E1C, 0x4F2A3979, 0x5D9F9697, 0xE523F1F2, |
||||
0x4D6B1905, 0xF5D77E60, 0xE762D18E, 0x5FDEB6EB, |
||||
0xC2098E52, 0x7AB5E937, 0x680046D9, 0xD0BC21BC, |
||||
0x88DF31EA, 0x3063568F, 0x22D6F961, 0x9A6A9E04, |
||||
0x07BDA6BD, 0xBF01C1D8, 0xADB46E36, 0x15080953, |
||||
0x1D724E9A, 0xA5CE29FF, 0xB77B8611, 0x0FC7E174, |
||||
0x9210D9CD, 0x2AACBEA8, 0x38191146, 0x80A57623, |
||||
0xD8C66675, 0x607A0110, 0x72CFAEFE, 0xCA73C99B, |
||||
0x57A4F122, 0xEF189647, 0xFDAD39A9, 0x45115ECC, |
||||
0x764DEE06, 0xCEF18963, 0xDC44268D, 0x64F841E8, |
||||
0xF92F7951, 0x41931E34, 0x5326B1DA, 0xEB9AD6BF, |
||||
0xB3F9C6E9, 0x0B45A18C, 0x19F00E62, 0xA14C6907, |
||||
0x3C9B51BE, 0x842736DB, 0x96929935, 0x2E2EFE50, |
||||
0x2654B999, 0x9EE8DEFC, 0x8C5D7112, 0x34E11677, |
||||
0xA9362ECE, 0x118A49AB, 0x033FE645, 0xBB838120, |
||||
0xE3E09176, 0x5B5CF613, 0x49E959FD, 0xF1553E98, |
||||
0x6C820621, 0xD43E6144, 0xC68BCEAA, 0x7E37A9CF, |
||||
0xD67F4138, 0x6EC3265D, 0x7C7689B3, 0xC4CAEED6, |
||||
0x591DD66F, 0xE1A1B10A, 0xF3141EE4, 0x4BA87981, |
||||
0x13CB69D7, 0xAB770EB2, 0xB9C2A15C, 0x017EC639, |
||||
0x9CA9FE80, 0x241599E5, 0x36A0360B, 0x8E1C516E, |
||||
0x866616A7, 0x3EDA71C2, 0x2C6FDE2C, 0x94D3B949, |
||||
0x090481F0, 0xB1B8E695, 0xA30D497B, 0x1BB12E1E, |
||||
0x43D23E48, 0xFB6E592D, 0xE9DBF6C3, 0x516791A6, |
||||
0xCCB0A91F, 0x740CCE7A, 0x66B96194, 0xDE0506F1, |
||||
/* T8_4 */ |
||||
0x00000000, 0x3D6029B0, 0x7AC05360, 0x47A07AD0, |
||||
0xF580A6C0, 0xC8E08F70, 0x8F40F5A0, 0xB220DC10, |
||||
0x30704BC1, 0x0D106271, 0x4AB018A1, 0x77D03111, |
||||
0xC5F0ED01, 0xF890C4B1, 0xBF30BE61, 0x825097D1, |
||||
0x60E09782, 0x5D80BE32, 0x1A20C4E2, 0x2740ED52, |
||||
0x95603142, 0xA80018F2, 0xEFA06222, 0xD2C04B92, |
||||
0x5090DC43, 0x6DF0F5F3, 0x2A508F23, 0x1730A693, |
||||
0xA5107A83, 0x98705333, 0xDFD029E3, 0xE2B00053, |
||||
0xC1C12F04, 0xFCA106B4, 0xBB017C64, 0x866155D4, |
||||
0x344189C4, 0x0921A074, 0x4E81DAA4, 0x73E1F314, |
||||
0xF1B164C5, 0xCCD14D75, 0x8B7137A5, 0xB6111E15, |
||||
0x0431C205, 0x3951EBB5, 0x7EF19165, 0x4391B8D5, |
||||
0xA121B886, 0x9C419136, 0xDBE1EBE6, 0xE681C256, |
||||
0x54A11E46, 0x69C137F6, 0x2E614D26, 0x13016496, |
||||
0x9151F347, 0xAC31DAF7, 0xEB91A027, 0xD6F18997, |
||||
0x64D15587, 0x59B17C37, 0x1E1106E7, 0x23712F57, |
||||
0x58F35849, 0x659371F9, 0x22330B29, 0x1F532299, |
||||
0xAD73FE89, 0x9013D739, 0xD7B3ADE9, 0xEAD38459, |
||||
0x68831388, 0x55E33A38, 0x124340E8, 0x2F236958, |
||||
0x9D03B548, 0xA0639CF8, 0xE7C3E628, 0xDAA3CF98, |
||||
0x3813CFCB, 0x0573E67B, 0x42D39CAB, 0x7FB3B51B, |
||||
0xCD93690B, 0xF0F340BB, 0xB7533A6B, 0x8A3313DB, |
||||
0x0863840A, 0x3503ADBA, 0x72A3D76A, 0x4FC3FEDA, |
||||
0xFDE322CA, 0xC0830B7A, 0x872371AA, 0xBA43581A, |
||||
0x9932774D, 0xA4525EFD, 0xE3F2242D, 0xDE920D9D, |
||||
0x6CB2D18D, 0x51D2F83D, 0x167282ED, 0x2B12AB5D, |
||||
0xA9423C8C, 0x9422153C, 0xD3826FEC, 0xEEE2465C, |
||||
0x5CC29A4C, 0x61A2B3FC, 0x2602C92C, 0x1B62E09C, |
||||
0xF9D2E0CF, 0xC4B2C97F, 0x8312B3AF, 0xBE729A1F, |
||||
0x0C52460F, 0x31326FBF, 0x7692156F, 0x4BF23CDF, |
||||
0xC9A2AB0E, 0xF4C282BE, 0xB362F86E, 0x8E02D1DE, |
||||
0x3C220DCE, 0x0142247E, 0x46E25EAE, 0x7B82771E, |
||||
0xB1E6B092, 0x8C869922, 0xCB26E3F2, 0xF646CA42, |
||||
0x44661652, 0x79063FE2, 0x3EA64532, 0x03C66C82, |
||||
0x8196FB53, 0xBCF6D2E3, 0xFB56A833, 0xC6368183, |
||||
0x74165D93, 0x49767423, 0x0ED60EF3, 0x33B62743, |
||||
0xD1062710, 0xEC660EA0, 0xABC67470, 0x96A65DC0, |
||||
0x248681D0, 0x19E6A860, 0x5E46D2B0, 0x6326FB00, |
||||
0xE1766CD1, 0xDC164561, 0x9BB63FB1, 0xA6D61601, |
||||
0x14F6CA11, 0x2996E3A1, 0x6E369971, 0x5356B0C1, |
||||
0x70279F96, 0x4D47B626, 0x0AE7CCF6, 0x3787E546, |
||||
0x85A73956, 0xB8C710E6, 0xFF676A36, 0xC2074386, |
||||
0x4057D457, 0x7D37FDE7, 0x3A978737, 0x07F7AE87, |
||||
0xB5D77297, 0x88B75B27, 0xCF1721F7, 0xF2770847, |
||||
0x10C70814, 0x2DA721A4, 0x6A075B74, 0x576772C4, |
||||
0xE547AED4, 0xD8278764, 0x9F87FDB4, 0xA2E7D404, |
||||
0x20B743D5, 0x1DD76A65, 0x5A7710B5, 0x67173905, |
||||
0xD537E515, 0xE857CCA5, 0xAFF7B675, 0x92979FC5, |
||||
0xE915E8DB, 0xD475C16B, 0x93D5BBBB, 0xAEB5920B, |
||||
0x1C954E1B, 0x21F567AB, 0x66551D7B, 0x5B3534CB, |
||||
0xD965A31A, 0xE4058AAA, 0xA3A5F07A, 0x9EC5D9CA, |
||||
0x2CE505DA, 0x11852C6A, 0x562556BA, 0x6B457F0A, |
||||
0x89F57F59, 0xB49556E9, 0xF3352C39, 0xCE550589, |
||||
0x7C75D999, 0x4115F029, 0x06B58AF9, 0x3BD5A349, |
||||
0xB9853498, 0x84E51D28, 0xC34567F8, 0xFE254E48, |
||||
0x4C059258, 0x7165BBE8, 0x36C5C138, 0x0BA5E888, |
||||
0x28D4C7DF, 0x15B4EE6F, 0x521494BF, 0x6F74BD0F, |
||||
0xDD54611F, 0xE03448AF, 0xA794327F, 0x9AF41BCF, |
||||
0x18A48C1E, 0x25C4A5AE, 0x6264DF7E, 0x5F04F6CE, |
||||
0xED242ADE, 0xD044036E, 0x97E479BE, 0xAA84500E, |
||||
0x4834505D, 0x755479ED, 0x32F4033D, 0x0F942A8D, |
||||
0xBDB4F69D, 0x80D4DF2D, 0xC774A5FD, 0xFA148C4D, |
||||
0x78441B9C, 0x4524322C, 0x028448FC, 0x3FE4614C, |
||||
0x8DC4BD5C, 0xB0A494EC, 0xF704EE3C, 0xCA64C78C, |
||||
/* T8_5 */ |
||||
0x00000000, 0xCB5CD3A5, 0x4DC8A10B, 0x869472AE, |
||||
0x9B914216, 0x50CD91B3, 0xD659E31D, 0x1D0530B8, |
||||
0xEC53826D, 0x270F51C8, 0xA19B2366, 0x6AC7F0C3, |
||||
0x77C2C07B, 0xBC9E13DE, 0x3A0A6170, 0xF156B2D5, |
||||
0x03D6029B, 0xC88AD13E, 0x4E1EA390, 0x85427035, |
||||
0x9847408D, 0x531B9328, 0xD58FE186, 0x1ED33223, |
||||
0xEF8580F6, 0x24D95353, 0xA24D21FD, 0x6911F258, |
||||
0x7414C2E0, 0xBF481145, 0x39DC63EB, 0xF280B04E, |
||||
0x07AC0536, 0xCCF0D693, 0x4A64A43D, 0x81387798, |
||||
0x9C3D4720, 0x57619485, 0xD1F5E62B, 0x1AA9358E, |
||||
0xEBFF875B, 0x20A354FE, 0xA6372650, 0x6D6BF5F5, |
||||
0x706EC54D, 0xBB3216E8, 0x3DA66446, 0xF6FAB7E3, |
||||
0x047A07AD, 0xCF26D408, 0x49B2A6A6, 0x82EE7503, |
||||
0x9FEB45BB, 0x54B7961E, 0xD223E4B0, 0x197F3715, |
||||
0xE82985C0, 0x23755665, 0xA5E124CB, 0x6EBDF76E, |
||||
0x73B8C7D6, 0xB8E41473, 0x3E7066DD, 0xF52CB578, |
||||
0x0F580A6C, 0xC404D9C9, 0x4290AB67, 0x89CC78C2, |
||||
0x94C9487A, 0x5F959BDF, 0xD901E971, 0x125D3AD4, |
||||
0xE30B8801, 0x28575BA4, 0xAEC3290A, 0x659FFAAF, |
||||
0x789ACA17, 0xB3C619B2, 0x35526B1C, 0xFE0EB8B9, |
||||
0x0C8E08F7, 0xC7D2DB52, 0x4146A9FC, 0x8A1A7A59, |
||||
0x971F4AE1, 0x5C439944, 0xDAD7EBEA, 0x118B384F, |
||||
0xE0DD8A9A, 0x2B81593F, 0xAD152B91, 0x6649F834, |
||||
0x7B4CC88C, 0xB0101B29, 0x36846987, 0xFDD8BA22, |
||||
0x08F40F5A, 0xC3A8DCFF, 0x453CAE51, 0x8E607DF4, |
||||
0x93654D4C, 0x58399EE9, 0xDEADEC47, 0x15F13FE2, |
||||
0xE4A78D37, 0x2FFB5E92, 0xA96F2C3C, 0x6233FF99, |
||||
0x7F36CF21, 0xB46A1C84, 0x32FE6E2A, 0xF9A2BD8F, |
||||
0x0B220DC1, 0xC07EDE64, 0x46EAACCA, 0x8DB67F6F, |
||||
0x90B34FD7, 0x5BEF9C72, 0xDD7BEEDC, 0x16273D79, |
||||
0xE7718FAC, 0x2C2D5C09, 0xAAB92EA7, 0x61E5FD02, |
||||
0x7CE0CDBA, 0xB7BC1E1F, 0x31286CB1, 0xFA74BF14, |
||||
0x1EB014D8, 0xD5ECC77D, 0x5378B5D3, 0x98246676, |
||||
0x852156CE, 0x4E7D856B, 0xC8E9F7C5, 0x03B52460, |
||||
0xF2E396B5, 0x39BF4510, 0xBF2B37BE, 0x7477E41B, |
||||
0x6972D4A3, 0xA22E0706, 0x24BA75A8, 0xEFE6A60D, |
||||
0x1D661643, 0xD63AC5E6, 0x50AEB748, 0x9BF264ED, |
||||
0x86F75455, 0x4DAB87F0, 0xCB3FF55E, 0x006326FB, |
||||
0xF135942E, 0x3A69478B, 0xBCFD3525, 0x77A1E680, |
||||
0x6AA4D638, 0xA1F8059D, 0x276C7733, 0xEC30A496, |
||||
0x191C11EE, 0xD240C24B, 0x54D4B0E5, 0x9F886340, |
||||
0x828D53F8, 0x49D1805D, 0xCF45F2F3, 0x04192156, |
||||
0xF54F9383, 0x3E134026, 0xB8873288, 0x73DBE12D, |
||||
0x6EDED195, 0xA5820230, 0x2316709E, 0xE84AA33B, |
||||
0x1ACA1375, 0xD196C0D0, 0x5702B27E, 0x9C5E61DB, |
||||
0x815B5163, 0x4A0782C6, 0xCC93F068, 0x07CF23CD, |
||||
0xF6999118, 0x3DC542BD, 0xBB513013, 0x700DE3B6, |
||||
0x6D08D30E, 0xA65400AB, 0x20C07205, 0xEB9CA1A0, |
||||
0x11E81EB4, 0xDAB4CD11, 0x5C20BFBF, 0x977C6C1A, |
||||
0x8A795CA2, 0x41258F07, 0xC7B1FDA9, 0x0CED2E0C, |
||||
0xFDBB9CD9, 0x36E74F7C, 0xB0733DD2, 0x7B2FEE77, |
||||
0x662ADECF, 0xAD760D6A, 0x2BE27FC4, 0xE0BEAC61, |
||||
0x123E1C2F, 0xD962CF8A, 0x5FF6BD24, 0x94AA6E81, |
||||
0x89AF5E39, 0x42F38D9C, 0xC467FF32, 0x0F3B2C97, |
||||
0xFE6D9E42, 0x35314DE7, 0xB3A53F49, 0x78F9ECEC, |
||||
0x65FCDC54, 0xAEA00FF1, 0x28347D5F, 0xE368AEFA, |
||||
0x16441B82, 0xDD18C827, 0x5B8CBA89, 0x90D0692C, |
||||
0x8DD55994, 0x46898A31, 0xC01DF89F, 0x0B412B3A, |
||||
0xFA1799EF, 0x314B4A4A, 0xB7DF38E4, 0x7C83EB41, |
||||
0x6186DBF9, 0xAADA085C, 0x2C4E7AF2, 0xE712A957, |
||||
0x15921919, 0xDECECABC, 0x585AB812, 0x93066BB7, |
||||
0x8E035B0F, 0x455F88AA, 0xC3CBFA04, 0x089729A1, |
||||
0xF9C19B74, 0x329D48D1, 0xB4093A7F, 0x7F55E9DA, |
||||
0x6250D962, 0xA90C0AC7, 0x2F987869, 0xE4C4ABCC, |
||||
/* T8_6 */ |
||||
0x00000000, 0xA6770BB4, 0x979F1129, 0x31E81A9D, |
||||
0xF44F2413, 0x52382FA7, 0x63D0353A, 0xC5A73E8E, |
||||
0x33EF4E67, 0x959845D3, 0xA4705F4E, 0x020754FA, |
||||
0xC7A06A74, 0x61D761C0, 0x503F7B5D, 0xF64870E9, |
||||
0x67DE9CCE, 0xC1A9977A, 0xF0418DE7, 0x56368653, |
||||
0x9391B8DD, 0x35E6B369, 0x040EA9F4, 0xA279A240, |
||||
0x5431D2A9, 0xF246D91D, 0xC3AEC380, 0x65D9C834, |
||||
0xA07EF6BA, 0x0609FD0E, 0x37E1E793, 0x9196EC27, |
||||
0xCFBD399C, 0x69CA3228, 0x582228B5, 0xFE552301, |
||||
0x3BF21D8F, 0x9D85163B, 0xAC6D0CA6, 0x0A1A0712, |
||||
0xFC5277FB, 0x5A257C4F, 0x6BCD66D2, 0xCDBA6D66, |
||||
0x081D53E8, 0xAE6A585C, 0x9F8242C1, 0x39F54975, |
||||
0xA863A552, 0x0E14AEE6, 0x3FFCB47B, 0x998BBFCF, |
||||
0x5C2C8141, 0xFA5B8AF5, 0xCBB39068, 0x6DC49BDC, |
||||
0x9B8CEB35, 0x3DFBE081, 0x0C13FA1C, 0xAA64F1A8, |
||||
0x6FC3CF26, 0xC9B4C492, 0xF85CDE0F, 0x5E2BD5BB, |
||||
0x440B7579, 0xE27C7ECD, 0xD3946450, 0x75E36FE4, |
||||
0xB044516A, 0x16335ADE, 0x27DB4043, 0x81AC4BF7, |
||||
0x77E43B1E, 0xD19330AA, 0xE07B2A37, 0x460C2183, |
||||
0x83AB1F0D, 0x25DC14B9, 0x14340E24, 0xB2430590, |
||||
0x23D5E9B7, 0x85A2E203, 0xB44AF89E, 0x123DF32A, |
||||
0xD79ACDA4, 0x71EDC610, 0x4005DC8D, 0xE672D739, |
||||
0x103AA7D0, 0xB64DAC64, 0x87A5B6F9, 0x21D2BD4D, |
||||
0xE47583C3, 0x42028877, 0x73EA92EA, 0xD59D995E, |
||||
0x8BB64CE5, 0x2DC14751, 0x1C295DCC, 0xBA5E5678, |
||||
0x7FF968F6, 0xD98E6342, 0xE86679DF, 0x4E11726B, |
||||
0xB8590282, 0x1E2E0936, 0x2FC613AB, 0x89B1181F, |
||||
0x4C162691, 0xEA612D25, 0xDB8937B8, 0x7DFE3C0C, |
||||
0xEC68D02B, 0x4A1FDB9F, 0x7BF7C102, 0xDD80CAB6, |
||||
0x1827F438, 0xBE50FF8C, 0x8FB8E511, 0x29CFEEA5, |
||||
0xDF879E4C, 0x79F095F8, 0x48188F65, 0xEE6F84D1, |
||||
0x2BC8BA5F, 0x8DBFB1EB, 0xBC57AB76, 0x1A20A0C2, |
||||
0x8816EAF2, 0x2E61E146, 0x1F89FBDB, 0xB9FEF06F, |
||||
0x7C59CEE1, 0xDA2EC555, 0xEBC6DFC8, 0x4DB1D47C, |
||||
0xBBF9A495, 0x1D8EAF21, 0x2C66B5BC, 0x8A11BE08, |
||||
0x4FB68086, 0xE9C18B32, 0xD82991AF, 0x7E5E9A1B, |
||||
0xEFC8763C, 0x49BF7D88, 0x78576715, 0xDE206CA1, |
||||
0x1B87522F, 0xBDF0599B, 0x8C184306, 0x2A6F48B2, |
||||
0xDC27385B, 0x7A5033EF, 0x4BB82972, 0xEDCF22C6, |
||||
0x28681C48, 0x8E1F17FC, 0xBFF70D61, 0x198006D5, |
||||
0x47ABD36E, 0xE1DCD8DA, 0xD034C247, 0x7643C9F3, |
||||
0xB3E4F77D, 0x1593FCC9, 0x247BE654, 0x820CEDE0, |
||||
0x74449D09, 0xD23396BD, 0xE3DB8C20, 0x45AC8794, |
||||
0x800BB91A, 0x267CB2AE, 0x1794A833, 0xB1E3A387, |
||||
0x20754FA0, 0x86024414, 0xB7EA5E89, 0x119D553D, |
||||
0xD43A6BB3, 0x724D6007, 0x43A57A9A, 0xE5D2712E, |
||||
0x139A01C7, 0xB5ED0A73, 0x840510EE, 0x22721B5A, |
||||
0xE7D525D4, 0x41A22E60, 0x704A34FD, 0xD63D3F49, |
||||
0xCC1D9F8B, 0x6A6A943F, 0x5B828EA2, 0xFDF58516, |
||||
0x3852BB98, 0x9E25B02C, 0xAFCDAAB1, 0x09BAA105, |
||||
0xFFF2D1EC, 0x5985DA58, 0x686DC0C5, 0xCE1ACB71, |
||||
0x0BBDF5FF, 0xADCAFE4B, 0x9C22E4D6, 0x3A55EF62, |
||||
0xABC30345, 0x0DB408F1, 0x3C5C126C, 0x9A2B19D8, |
||||
0x5F8C2756, 0xF9FB2CE2, 0xC813367F, 0x6E643DCB, |
||||
0x982C4D22, 0x3E5B4696, 0x0FB35C0B, 0xA9C457BF, |
||||
0x6C636931, 0xCA146285, 0xFBFC7818, 0x5D8B73AC, |
||||
0x03A0A617, 0xA5D7ADA3, 0x943FB73E, 0x3248BC8A, |
||||
0xF7EF8204, 0x519889B0, 0x6070932D, 0xC6079899, |
||||
0x304FE870, 0x9638E3C4, 0xA7D0F959, 0x01A7F2ED, |
||||
0xC400CC63, 0x6277C7D7, 0x539FDD4A, 0xF5E8D6FE, |
||||
0x647E3AD9, 0xC209316D, 0xF3E12BF0, 0x55962044, |
||||
0x90311ECA, 0x3646157E, 0x07AE0FE3, 0xA1D90457, |
||||
0x579174BE, 0xF1E67F0A, 0xC00E6597, 0x66796E23, |
||||
0xA3DE50AD, 0x05A95B19, 0x34414184, 0x92364A30, |
||||
/* T8_7 */ |
||||
0x00000000, 0xCCAA009E, 0x4225077D, 0x8E8F07E3, |
||||
0x844A0EFA, 0x48E00E64, 0xC66F0987, 0x0AC50919, |
||||
0xD3E51BB5, 0x1F4F1B2B, 0x91C01CC8, 0x5D6A1C56, |
||||
0x57AF154F, 0x9B0515D1, 0x158A1232, 0xD92012AC, |
||||
0x7CBB312B, 0xB01131B5, 0x3E9E3656, 0xF23436C8, |
||||
0xF8F13FD1, 0x345B3F4F, 0xBAD438AC, 0x767E3832, |
||||
0xAF5E2A9E, 0x63F42A00, 0xED7B2DE3, 0x21D12D7D, |
||||
0x2B142464, 0xE7BE24FA, 0x69312319, 0xA59B2387, |
||||
0xF9766256, 0x35DC62C8, 0xBB53652B, 0x77F965B5, |
||||
0x7D3C6CAC, 0xB1966C32, 0x3F196BD1, 0xF3B36B4F, |
||||
0x2A9379E3, 0xE639797D, 0x68B67E9E, 0xA41C7E00, |
||||
0xAED97719, 0x62737787, 0xECFC7064, 0x205670FA, |
||||
0x85CD537D, 0x496753E3, 0xC7E85400, 0x0B42549E, |
||||
0x01875D87, 0xCD2D5D19, 0x43A25AFA, 0x8F085A64, |
||||
0x562848C8, 0x9A824856, 0x140D4FB5, 0xD8A74F2B, |
||||
0xD2624632, 0x1EC846AC, 0x9047414F, 0x5CED41D1, |
||||
0x299DC2ED, 0xE537C273, 0x6BB8C590, 0xA712C50E, |
||||
0xADD7CC17, 0x617DCC89, 0xEFF2CB6A, 0x2358CBF4, |
||||
0xFA78D958, 0x36D2D9C6, 0xB85DDE25, 0x74F7DEBB, |
||||
0x7E32D7A2, 0xB298D73C, 0x3C17D0DF, 0xF0BDD041, |
||||
0x5526F3C6, 0x998CF358, 0x1703F4BB, 0xDBA9F425, |
||||
0xD16CFD3C, 0x1DC6FDA2, 0x9349FA41, 0x5FE3FADF, |
||||
0x86C3E873, 0x4A69E8ED, 0xC4E6EF0E, 0x084CEF90, |
||||
0x0289E689, 0xCE23E617, 0x40ACE1F4, 0x8C06E16A, |
||||
0xD0EBA0BB, 0x1C41A025, 0x92CEA7C6, 0x5E64A758, |
||||
0x54A1AE41, 0x980BAEDF, 0x1684A93C, 0xDA2EA9A2, |
||||
0x030EBB0E, 0xCFA4BB90, 0x412BBC73, 0x8D81BCED, |
||||
0x8744B5F4, 0x4BEEB56A, 0xC561B289, 0x09CBB217, |
||||
0xAC509190, 0x60FA910E, 0xEE7596ED, 0x22DF9673, |
||||
0x281A9F6A, 0xE4B09FF4, 0x6A3F9817, 0xA6959889, |
||||
0x7FB58A25, 0xB31F8ABB, 0x3D908D58, 0xF13A8DC6, |
||||
0xFBFF84DF, 0x37558441, 0xB9DA83A2, 0x7570833C, |
||||
0x533B85DA, 0x9F918544, 0x111E82A7, 0xDDB48239, |
||||
0xD7718B20, 0x1BDB8BBE, 0x95548C5D, 0x59FE8CC3, |
||||
0x80DE9E6F, 0x4C749EF1, 0xC2FB9912, 0x0E51998C, |
||||
0x04949095, 0xC83E900B, 0x46B197E8, 0x8A1B9776, |
||||
0x2F80B4F1, 0xE32AB46F, 0x6DA5B38C, 0xA10FB312, |
||||
0xABCABA0B, 0x6760BA95, 0xE9EFBD76, 0x2545BDE8, |
||||
0xFC65AF44, 0x30CFAFDA, 0xBE40A839, 0x72EAA8A7, |
||||
0x782FA1BE, 0xB485A120, 0x3A0AA6C3, 0xF6A0A65D, |
||||
0xAA4DE78C, 0x66E7E712, 0xE868E0F1, 0x24C2E06F, |
||||
0x2E07E976, 0xE2ADE9E8, 0x6C22EE0B, 0xA088EE95, |
||||
0x79A8FC39, 0xB502FCA7, 0x3B8DFB44, 0xF727FBDA, |
||||
0xFDE2F2C3, 0x3148F25D, 0xBFC7F5BE, 0x736DF520, |
||||
0xD6F6D6A7, 0x1A5CD639, 0x94D3D1DA, 0x5879D144, |
||||
0x52BCD85D, 0x9E16D8C3, 0x1099DF20, 0xDC33DFBE, |
||||
0x0513CD12, 0xC9B9CD8C, 0x4736CA6F, 0x8B9CCAF1, |
||||
0x8159C3E8, 0x4DF3C376, 0xC37CC495, 0x0FD6C40B, |
||||
0x7AA64737, 0xB60C47A9, 0x3883404A, 0xF42940D4, |
||||
0xFEEC49CD, 0x32464953, 0xBCC94EB0, 0x70634E2E, |
||||
0xA9435C82, 0x65E95C1C, 0xEB665BFF, 0x27CC5B61, |
||||
0x2D095278, 0xE1A352E6, 0x6F2C5505, 0xA386559B, |
||||
0x061D761C, 0xCAB77682, 0x44387161, 0x889271FF, |
||||
0x825778E6, 0x4EFD7878, 0xC0727F9B, 0x0CD87F05, |
||||
0xD5F86DA9, 0x19526D37, 0x97DD6AD4, 0x5B776A4A, |
||||
0x51B26353, 0x9D1863CD, 0x1397642E, 0xDF3D64B0, |
||||
0x83D02561, 0x4F7A25FF, 0xC1F5221C, 0x0D5F2282, |
||||
0x079A2B9B, 0xCB302B05, 0x45BF2CE6, 0x89152C78, |
||||
0x50353ED4, 0x9C9F3E4A, 0x121039A9, 0xDEBA3937, |
||||
0xD47F302E, 0x18D530B0, 0x965A3753, 0x5AF037CD, |
||||
0xFF6B144A, 0x33C114D4, 0xBD4E1337, 0x71E413A9, |
||||
0x7B211AB0, 0xB78B1A2E, 0x39041DCD, 0xF5AE1D53, |
||||
0x2C8E0FFF, 0xE0240F61, 0x6EAB0882, 0xA201081C, |
||||
0xA8C40105, 0x646E019B, 0xEAE10678, 0x264B06E6 |
||||
}; |
||||
} |
@ -0,0 +1,640 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
/* |
||||
* Some portions of this file Copyright (c) 2004-2006 Intel Corportation |
||||
* and licensed under the BSD license. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.digest; |
||||
|
||||
import java.util.zip.Checksum; |
||||
|
||||
/** |
||||
* A pure-java implementation of the CRC32 checksum that uses |
||||
* the CRC32-C polynomial, the same polynomial used by iSCSI |
||||
* and implemented on many Intel chipsets supporting SSE4.2. |
||||
* |
||||
* Copied from Hadoop 2.3.6: |
||||
* https://git-wip-us.apache.org/repos/asf?p=hadoop.git;a=blob_plain;
|
||||
* f=hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java; |
||||
* hb=2120de588b92b9f22b1cc4188761d6a8c61aa778 |
||||
* <p> |
||||
* This class is Not ThreadSafe |
||||
* @since 1.11 |
||||
*/ |
||||
public class PureJavaCrc32C implements Checksum { |
||||
|
||||
/** the current CRC value, bit-flipped */ |
||||
private int crc; |
||||
|
||||
/** Create a new PureJavaCrc32 object. */ |
||||
public PureJavaCrc32C() { |
||||
reset(); |
||||
} |
||||
|
||||
@Override |
||||
public long getValue() { |
||||
final long ret = crc; |
||||
return (~ret) & 0xffffffffL; |
||||
} |
||||
|
||||
@Override |
||||
public void reset() { |
||||
crc = 0xffffffff; |
||||
} |
||||
|
||||
@Override |
||||
public void update(final byte[] b, int off, int len) { |
||||
int localCrc = crc; |
||||
|
||||
while(len > 7) { |
||||
final int c0 =(b[off+0] ^ localCrc) & 0xff; |
||||
final int c1 =(b[off+1] ^ (localCrc >>>= 8)) & 0xff; |
||||
final int c2 =(b[off+2] ^ (localCrc >>>= 8)) & 0xff; |
||||
final int c3 =(b[off+3] ^ (localCrc >>>= 8)) & 0xff; |
||||
localCrc = (T[T8_7_start + c0] ^ T[T8_6_start + c1]) ^ |
||||
(T[T8_5_start + c2] ^ T[T8_4_start + c3]); |
||||
|
||||
final int c4 = b[off+4] & 0xff; |
||||
final int c5 = b[off+5] & 0xff; |
||||
final int c6 = b[off+6] & 0xff; |
||||
final int c7 = b[off+7] & 0xff; |
||||
|
||||
localCrc ^= (T[T8_3_start + c4] ^ T[T8_2_start + c5]) ^ |
||||
(T[T8_1_start + c6] ^ T[T8_0_start + c7]); |
||||
|
||||
off += 8; |
||||
len -= 8; |
||||
} |
||||
|
||||
/* loop unroll - duff's device style */ |
||||
switch(len) { |
||||
case 7: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; |
||||
case 6: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; |
||||
case 5: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; |
||||
case 4: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; |
||||
case 3: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; |
||||
case 2: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; |
||||
case 1: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; |
||||
default: |
||||
break; // satisfy Findbugs
|
||||
} |
||||
|
||||
// Publish crc out to object
|
||||
crc = localCrc; |
||||
} |
||||
|
||||
@Override |
||||
final public void update(final int b) { |
||||
crc = (crc >>> 8) ^ T[T8_0_start + ((crc ^ b) & 0xff)]; |
||||
} |
||||
|
||||
// CRC polynomial tables generated by:
|
||||
// java -cp build/test/classes/:build/classes/ \
|
||||
// org.apache.hadoop.util.TestPureJavaCrc32\$Table 82F63B78
|
||||
|
||||
private static final int T8_0_start = 0*256; |
||||
private static final int T8_1_start = 1*256; |
||||
private static final int T8_2_start = 2*256; |
||||
private static final int T8_3_start = 3*256; |
||||
private static final int T8_4_start = 4*256; |
||||
private static final int T8_5_start = 5*256; |
||||
private static final int T8_6_start = 6*256; |
||||
private static final int T8_7_start = 7*256; |
||||
|
||||
private static final int[] T = new int[] { |
||||
/* T8_0 */ |
||||
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, |
||||
0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, |
||||
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, |
||||
0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, |
||||
0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, |
||||
0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, |
||||
0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, |
||||
0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, |
||||
0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, |
||||
0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, |
||||
0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, |
||||
0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, |
||||
0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, |
||||
0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, |
||||
0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, |
||||
0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, |
||||
0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, |
||||
0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, |
||||
0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, |
||||
0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, |
||||
0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, |
||||
0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, |
||||
0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, |
||||
0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, |
||||
0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, |
||||
0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, |
||||
0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, |
||||
0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, |
||||
0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, |
||||
0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, |
||||
0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, |
||||
0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, |
||||
0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, |
||||
0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, |
||||
0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, |
||||
0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, |
||||
0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, |
||||
0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, |
||||
0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, |
||||
0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, |
||||
0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, |
||||
0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, |
||||
0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, |
||||
0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, |
||||
0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, |
||||
0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, |
||||
0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, |
||||
0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, |
||||
0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, |
||||
0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, |
||||
0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, |
||||
0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, |
||||
0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, |
||||
0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, |
||||
0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, |
||||
0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, |
||||
0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, |
||||
0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, |
||||
0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, |
||||
0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, |
||||
0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, |
||||
0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, |
||||
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, |
||||
0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351, |
||||
/* T8_1 */ |
||||
0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, |
||||
0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945, |
||||
0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, |
||||
0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, |
||||
0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, |
||||
0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4, |
||||
0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, |
||||
0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C, |
||||
0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, |
||||
0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47, |
||||
0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, |
||||
0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF, |
||||
0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, |
||||
0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6, |
||||
0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, |
||||
0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E, |
||||
0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, |
||||
0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41, |
||||
0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, |
||||
0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9, |
||||
0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, |
||||
0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0, |
||||
0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, |
||||
0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78, |
||||
0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, |
||||
0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43, |
||||
0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, |
||||
0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB, |
||||
0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, |
||||
0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2, |
||||
0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, |
||||
0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A, |
||||
0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, |
||||
0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC, |
||||
0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, |
||||
0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004, |
||||
0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, |
||||
0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D, |
||||
0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, |
||||
0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185, |
||||
0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, |
||||
0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE, |
||||
0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, |
||||
0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306, |
||||
0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, |
||||
0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F, |
||||
0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, |
||||
0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287, |
||||
0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, |
||||
0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8, |
||||
0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, |
||||
0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600, |
||||
0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, |
||||
0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439, |
||||
0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, |
||||
0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781, |
||||
0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, |
||||
0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA, |
||||
0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, |
||||
0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502, |
||||
0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, |
||||
0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B, |
||||
0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, |
||||
0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483, |
||||
/* T8_2 */ |
||||
0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, |
||||
0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469, |
||||
0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, |
||||
0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC, |
||||
0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, |
||||
0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3, |
||||
0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, |
||||
0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726, |
||||
0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, |
||||
0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D, |
||||
0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, |
||||
0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8, |
||||
0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, |
||||
0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7, |
||||
0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, |
||||
0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32, |
||||
0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, |
||||
0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0, |
||||
0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, |
||||
0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75, |
||||
0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, |
||||
0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A, |
||||
0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, |
||||
0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF, |
||||
0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, |
||||
0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4, |
||||
0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, |
||||
0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161, |
||||
0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, |
||||
0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E, |
||||
0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, |
||||
0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB, |
||||
0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, |
||||
0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A, |
||||
0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, |
||||
0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF, |
||||
0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, |
||||
0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0, |
||||
0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, |
||||
0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065, |
||||
0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, |
||||
0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E, |
||||
0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, |
||||
0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB, |
||||
0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, |
||||
0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4, |
||||
0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, |
||||
0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71, |
||||
0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, |
||||
0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3, |
||||
0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, |
||||
0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36, |
||||
0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, |
||||
0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79, |
||||
0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, |
||||
0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC, |
||||
0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, |
||||
0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7, |
||||
0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, |
||||
0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622, |
||||
0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, |
||||
0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D, |
||||
0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, |
||||
0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8, |
||||
/* T8_3 */ |
||||
0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, |
||||
0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA, |
||||
0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, |
||||
0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C, |
||||
0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, |
||||
0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7, |
||||
0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, |
||||
0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11, |
||||
0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, |
||||
0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41, |
||||
0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, |
||||
0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7, |
||||
0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, |
||||
0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C, |
||||
0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, |
||||
0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A, |
||||
0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, |
||||
0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D, |
||||
0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, |
||||
0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB, |
||||
0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, |
||||
0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610, |
||||
0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, |
||||
0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6, |
||||
0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, |
||||
0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6, |
||||
0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, |
||||
0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040, |
||||
0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, |
||||
0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B, |
||||
0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, |
||||
0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D, |
||||
0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, |
||||
0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5, |
||||
0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, |
||||
0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213, |
||||
0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, |
||||
0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8, |
||||
0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, |
||||
0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E, |
||||
0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, |
||||
0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E, |
||||
0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, |
||||
0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698, |
||||
0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, |
||||
0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443, |
||||
0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, |
||||
0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5, |
||||
0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, |
||||
0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12, |
||||
0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, |
||||
0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4, |
||||
0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, |
||||
0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F, |
||||
0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, |
||||
0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9, |
||||
0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, |
||||
0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99, |
||||
0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, |
||||
0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F, |
||||
0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, |
||||
0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4, |
||||
0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, |
||||
0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842, |
||||
/* T8_4 */ |
||||
0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, |
||||
0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44, |
||||
0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, |
||||
0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5, |
||||
0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, |
||||
0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97, |
||||
0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, |
||||
0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406, |
||||
0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, |
||||
0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13, |
||||
0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, |
||||
0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082, |
||||
0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, |
||||
0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0, |
||||
0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, |
||||
0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151, |
||||
0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, |
||||
0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA, |
||||
0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, |
||||
0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B, |
||||
0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, |
||||
0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539, |
||||
0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, |
||||
0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8, |
||||
0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, |
||||
0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD, |
||||
0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, |
||||
0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C, |
||||
0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, |
||||
0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E, |
||||
0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, |
||||
0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF, |
||||
0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, |
||||
0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18, |
||||
0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, |
||||
0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089, |
||||
0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, |
||||
0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB, |
||||
0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, |
||||
0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A, |
||||
0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, |
||||
0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F, |
||||
0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, |
||||
0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE, |
||||
0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, |
||||
0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C, |
||||
0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, |
||||
0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D, |
||||
0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, |
||||
0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6, |
||||
0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, |
||||
0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27, |
||||
0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, |
||||
0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065, |
||||
0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, |
||||
0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4, |
||||
0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, |
||||
0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1, |
||||
0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, |
||||
0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70, |
||||
0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, |
||||
0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532, |
||||
0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, |
||||
0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3, |
||||
/* T8_5 */ |
||||
0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, |
||||
0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD, |
||||
0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, |
||||
0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2, |
||||
0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, |
||||
0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93, |
||||
0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, |
||||
0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C, |
||||
0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, |
||||
0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20, |
||||
0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, |
||||
0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F, |
||||
0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, |
||||
0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E, |
||||
0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, |
||||
0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201, |
||||
0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, |
||||
0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746, |
||||
0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, |
||||
0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59, |
||||
0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, |
||||
0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778, |
||||
0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, |
||||
0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67, |
||||
0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, |
||||
0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB, |
||||
0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, |
||||
0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4, |
||||
0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, |
||||
0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5, |
||||
0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, |
||||
0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA, |
||||
0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, |
||||
0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B, |
||||
0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, |
||||
0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364, |
||||
0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, |
||||
0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45, |
||||
0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, |
||||
0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A, |
||||
0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, |
||||
0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6, |
||||
0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, |
||||
0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9, |
||||
0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, |
||||
0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8, |
||||
0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, |
||||
0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7, |
||||
0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, |
||||
0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090, |
||||
0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, |
||||
0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F, |
||||
0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, |
||||
0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE, |
||||
0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, |
||||
0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1, |
||||
0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, |
||||
0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D, |
||||
0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, |
||||
0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02, |
||||
0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, |
||||
0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623, |
||||
0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, |
||||
0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C, |
||||
/* T8_6 */ |
||||
0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, |
||||
0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089, |
||||
0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, |
||||
0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA, |
||||
0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, |
||||
0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F, |
||||
0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, |
||||
0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C, |
||||
0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, |
||||
0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334, |
||||
0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, |
||||
0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67, |
||||
0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, |
||||
0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992, |
||||
0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, |
||||
0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1, |
||||
0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, |
||||
0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3, |
||||
0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, |
||||
0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0, |
||||
0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, |
||||
0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55, |
||||
0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, |
||||
0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006, |
||||
0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, |
||||
0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E, |
||||
0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, |
||||
0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D, |
||||
0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, |
||||
0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8, |
||||
0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, |
||||
0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB, |
||||
0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, |
||||
0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D, |
||||
0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, |
||||
0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E, |
||||
0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, |
||||
0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB, |
||||
0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, |
||||
0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988, |
||||
0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, |
||||
0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0, |
||||
0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, |
||||
0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093, |
||||
0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, |
||||
0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766, |
||||
0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, |
||||
0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35, |
||||
0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, |
||||
0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907, |
||||
0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, |
||||
0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454, |
||||
0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, |
||||
0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1, |
||||
0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, |
||||
0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2, |
||||
0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, |
||||
0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA, |
||||
0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, |
||||
0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9, |
||||
0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, |
||||
0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C, |
||||
0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, |
||||
0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F, |
||||
/* T8_7 */ |
||||
0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, |
||||
0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504, |
||||
0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, |
||||
0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE, |
||||
0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, |
||||
0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0, |
||||
0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, |
||||
0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A, |
||||
0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, |
||||
0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D, |
||||
0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, |
||||
0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447, |
||||
0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, |
||||
0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929, |
||||
0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, |
||||
0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3, |
||||
0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, |
||||
0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36, |
||||
0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, |
||||
0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC, |
||||
0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, |
||||
0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782, |
||||
0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, |
||||
0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358, |
||||
0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, |
||||
0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF, |
||||
0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, |
||||
0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75, |
||||
0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, |
||||
0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B, |
||||
0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, |
||||
0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1, |
||||
0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, |
||||
0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360, |
||||
0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, |
||||
0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA, |
||||
0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, |
||||
0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4, |
||||
0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, |
||||
0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E, |
||||
0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, |
||||
0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9, |
||||
0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, |
||||
0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223, |
||||
0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, |
||||
0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D, |
||||
0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, |
||||
0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97, |
||||
0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, |
||||
0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852, |
||||
0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, |
||||
0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88, |
||||
0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, |
||||
0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6, |
||||
0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, |
||||
0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C, |
||||
0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, |
||||
0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB, |
||||
0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, |
||||
0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911, |
||||
0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, |
||||
0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F, |
||||
0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, |
||||
0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5 |
||||
}; |
||||
} |
@ -0,0 +1,545 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.digest; |
||||
|
||||
import java.security.MessageDigest; |
||||
import java.security.NoSuchAlgorithmException; |
||||
import java.util.Arrays; |
||||
import java.util.regex.Matcher; |
||||
import java.util.regex.Pattern; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.Charsets; |
||||
|
||||
/** |
||||
* SHA2-based Unix crypt implementation. |
||||
* <p> |
||||
* Based on the C implementation released into the Public Domain by Ulrich Drepper <drepper@redhat.com> |
||||
* http://www.akkadia.org/drepper/SHA-crypt.txt
|
||||
* <p> |
||||
* Conversion to Kotlin and from there to Java in 2012 by Christian Hammers <ch@lathspell.de> and likewise put |
||||
* into the Public Domain. |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* |
||||
* @version $Id: Sha2Crypt.java 1744746 2016-05-20 14:19:43Z sebb $ |
||||
* @since 1.7 |
||||
*/ |
||||
public class Sha2Crypt { |
||||
|
||||
/** Default number of rounds if not explicitly specified. */ |
||||
private static final int ROUNDS_DEFAULT = 5000; |
||||
|
||||
/** Maximum number of rounds. */ |
||||
private static final int ROUNDS_MAX = 999999999; |
||||
|
||||
/** Minimum number of rounds. */ |
||||
private static final int ROUNDS_MIN = 1000; |
||||
|
||||
/** Prefix for optional rounds specification. */ |
||||
private static final String ROUNDS_PREFIX = "rounds="; |
||||
|
||||
/** The number of bytes the final hash value will have (SHA-256 variant). */ |
||||
private static final int SHA256_BLOCKSIZE = 32; |
||||
|
||||
/** The prefixes that can be used to identify this crypt() variant (SHA-256). */ |
||||
static final String SHA256_PREFIX = "$5$"; |
||||
|
||||
/** The number of bytes the final hash value will have (SHA-512 variant). */ |
||||
private static final int SHA512_BLOCKSIZE = 64; |
||||
|
||||
/** The prefixes that can be used to identify this crypt() variant (SHA-512). */ |
||||
static final String SHA512_PREFIX = "$6$"; |
||||
|
||||
/** The pattern to match valid salt values. */ |
||||
private static final Pattern SALT_PATTERN = Pattern |
||||
.compile("^\\$([56])\\$(rounds=(\\d+)\\$)?([\\.\\/a-zA-Z0-9]{1,16}).*"); |
||||
|
||||
/** |
||||
* Generates a libc crypt() compatible "$5$" hash value with random salt. |
||||
* <p> |
||||
* See {@link Crypt#crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext to hash |
||||
* @return complete hash value |
||||
* @throws RuntimeException |
||||
* when a {@link NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String sha256Crypt(final byte[] keyBytes) { |
||||
return sha256Crypt(keyBytes, null); |
||||
} |
||||
|
||||
/** |
||||
* Generates a libc6 crypt() compatible "$5$" hash value. |
||||
* <p> |
||||
* See {@link Crypt#crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext to hash |
||||
* @param salt |
||||
* real salt value without prefix or "rounds=" |
||||
* @return complete hash value including salt |
||||
* @throws IllegalArgumentException |
||||
* if the salt does not match the allowed pattern |
||||
* @throws RuntimeException |
||||
* when a {@link NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String sha256Crypt(final byte[] keyBytes, String salt) { |
||||
if (salt == null) { |
||||
salt = SHA256_PREFIX + B64.getRandomSalt(8); |
||||
} |
||||
return sha2Crypt(keyBytes, salt, SHA256_PREFIX, SHA256_BLOCKSIZE, MessageDigestAlgorithms.SHA_256); |
||||
} |
||||
|
||||
/** |
||||
* Generates a libc6 crypt() compatible "$5$" or "$6$" SHA2 based hash value. |
||||
* <p> |
||||
* This is a nearly line by line conversion of the original C function. The numbered comments are from the algorithm |
||||
* description, the short C-style ones from the original C code and the ones with "Remark" from me. |
||||
* <p> |
||||
* See {@link Crypt#crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext to hash |
||||
* @param salt |
||||
* real salt value without prefix or "rounds=" |
||||
* @param saltPrefix |
||||
* either $5$ or $6$ |
||||
* @param blocksize |
||||
* a value that differs between $5$ and $6$ |
||||
* @param algorithm |
||||
* {@link MessageDigest} algorithm identifier string |
||||
* @return complete hash value including prefix and salt |
||||
* @throws IllegalArgumentException |
||||
* if the given salt is <code>null</code> or does not match the allowed pattern |
||||
* @throws IllegalArgumentException |
||||
* when a {@link NoSuchAlgorithmException} is caught |
||||
* @see MessageDigestAlgorithms |
||||
*/ |
||||
private static String sha2Crypt(final byte[] keyBytes, final String salt, final String saltPrefix, |
||||
final int blocksize, final String algorithm) { |
||||
|
||||
final int keyLen = keyBytes.length; |
||||
|
||||
// Extracts effective salt and the number of rounds from the given salt.
|
||||
int rounds = ROUNDS_DEFAULT; |
||||
boolean roundsCustom = false; |
||||
if (salt == null) { |
||||
throw new IllegalArgumentException("Salt must not be null"); |
||||
} |
||||
|
||||
final Matcher m = SALT_PATTERN.matcher(salt); |
||||
if (!m.find()) { |
||||
throw new IllegalArgumentException("Invalid salt value: " + salt); |
||||
} |
||||
if (m.group(3) != null) { |
||||
rounds = Integer.parseInt(m.group(3)); |
||||
rounds = Math.max(ROUNDS_MIN, Math.min(ROUNDS_MAX, rounds)); |
||||
roundsCustom = true; |
||||
} |
||||
final String saltString = m.group(4); |
||||
final byte[] saltBytes = saltString.getBytes(Charsets.UTF_8); |
||||
final int saltLen = saltBytes.length; |
||||
|
||||
// 1. start digest A
|
||||
// Prepare for the real work.
|
||||
MessageDigest ctx = DigestUtils.getDigest(algorithm); |
||||
|
||||
// 2. the password string is added to digest A
|
||||
/* |
||||
* Add the key string. |
||||
*/ |
||||
ctx.update(keyBytes); |
||||
|
||||
// 3. the salt string is added to digest A. This is just the salt string
|
||||
// itself without the enclosing '$', without the magic salt_prefix $5$ and
|
||||
// $6$ respectively and without the rounds=<N> specification.
|
||||
//
|
||||
// NB: the MD5 algorithm did add the $1$ salt_prefix. This is not deemed
|
||||
// necessary since it is a constant string and does not add security
|
||||
// and /possibly/ allows a plain text attack. Since the rounds=<N>
|
||||
// specification should never be added this would also create an
|
||||
// inconsistency.
|
||||
/* |
||||
* The last part is the salt string. This must be at most 16 characters and it ends at the first `$' character |
||||
* (for compatibility with existing implementations). |
||||
*/ |
||||
ctx.update(saltBytes); |
||||
|
||||
// 4. start digest B
|
||||
/* |
||||
* Compute alternate sha512 sum with input KEY, SALT, and KEY. The final result will be added to the first |
||||
* context. |
||||
*/ |
||||
MessageDigest altCtx = DigestUtils.getDigest(algorithm); |
||||
|
||||
// 5. add the password to digest B
|
||||
/* |
||||
* Add key. |
||||
*/ |
||||
altCtx.update(keyBytes); |
||||
|
||||
// 6. add the salt string to digest B
|
||||
/* |
||||
* Add salt. |
||||
*/ |
||||
altCtx.update(saltBytes); |
||||
|
||||
// 7. add the password again to digest B
|
||||
/* |
||||
* Add key again. |
||||
*/ |
||||
altCtx.update(keyBytes); |
||||
|
||||
// 8. finish digest B
|
||||
/* |
||||
* Now get result of this (32 bytes) and add it to the other context. |
||||
*/ |
||||
byte[] altResult = altCtx.digest(); |
||||
|
||||
// 9. For each block of 32 or 64 bytes in the password string (excluding
|
||||
// the terminating NUL in the C representation), add digest B to digest A
|
||||
/* |
||||
* Add for any character in the key one byte of the alternate sum. |
||||
*/ |
||||
/* |
||||
* (Remark: the C code comment seems wrong for key length > 32!) |
||||
*/ |
||||
int cnt = keyBytes.length; |
||||
while (cnt > blocksize) { |
||||
ctx.update(altResult, 0, blocksize); |
||||
cnt -= blocksize; |
||||
} |
||||
|
||||
// 10. For the remaining N bytes of the password string add the first
|
||||
// N bytes of digest B to digest A
|
||||
ctx.update(altResult, 0, cnt); |
||||
|
||||
// 11. For each bit of the binary representation of the length of the
|
||||
// password string up to and including the highest 1-digit, starting
|
||||
// from to lowest bit position (numeric value 1):
|
||||
//
|
||||
// a) for a 1-digit add digest B to digest A
|
||||
//
|
||||
// b) for a 0-digit add the password string
|
||||
//
|
||||
// NB: this step differs significantly from the MD5 algorithm. It
|
||||
// adds more randomness.
|
||||
/* |
||||
* Take the binary representation of the length of the key and for every 1 add the alternate sum, for every 0 |
||||
* the key. |
||||
*/ |
||||
cnt = keyBytes.length; |
||||
while (cnt > 0) { |
||||
if ((cnt & 1) != 0) { |
||||
ctx.update(altResult, 0, blocksize); |
||||
} else { |
||||
ctx.update(keyBytes); |
||||
} |
||||
cnt >>= 1; |
||||
} |
||||
|
||||
// 12. finish digest A
|
||||
/* |
||||
* Create intermediate result. |
||||
*/ |
||||
altResult = ctx.digest(); |
||||
|
||||
// 13. start digest DP
|
||||
/* |
||||
* Start computation of P byte sequence. |
||||
*/ |
||||
altCtx = DigestUtils.getDigest(algorithm); |
||||
|
||||
// 14. for every byte in the password (excluding the terminating NUL byte
|
||||
// in the C representation of the string)
|
||||
//
|
||||
// add the password to digest DP
|
||||
/* |
||||
* For every character in the password add the entire password. |
||||
*/ |
||||
for (int i = 1; i <= keyLen; i++) { |
||||
altCtx.update(keyBytes); |
||||
} |
||||
|
||||
// 15. finish digest DP
|
||||
/* |
||||
* Finish the digest. |
||||
*/ |
||||
byte[] tempResult = altCtx.digest(); |
||||
|
||||
// 16. produce byte sequence P of the same length as the password where
|
||||
//
|
||||
// a) for each block of 32 or 64 bytes of length of the password string
|
||||
// the entire digest DP is used
|
||||
//
|
||||
// b) for the remaining N (up to 31 or 63) bytes use the first N
|
||||
// bytes of digest DP
|
||||
/* |
||||
* Create byte sequence P. |
||||
*/ |
||||
final byte[] pBytes = new byte[keyLen]; |
||||
int cp = 0; |
||||
while (cp < keyLen - blocksize) { |
||||
System.arraycopy(tempResult, 0, pBytes, cp, blocksize); |
||||
cp += blocksize; |
||||
} |
||||
System.arraycopy(tempResult, 0, pBytes, cp, keyLen - cp); |
||||
|
||||
// 17. start digest DS
|
||||
/* |
||||
* Start computation of S byte sequence. |
||||
*/ |
||||
altCtx = DigestUtils.getDigest(algorithm); |
||||
|
||||
// 18. repeast the following 16+A[0] times, where A[0] represents the first
|
||||
// byte in digest A interpreted as an 8-bit unsigned value
|
||||
//
|
||||
// add the salt to digest DS
|
||||
/* |
||||
* For every character in the password add the entire password. |
||||
*/ |
||||
for (int i = 1; i <= 16 + (altResult[0] & 0xff); i++) { |
||||
altCtx.update(saltBytes); |
||||
} |
||||
|
||||
// 19. finish digest DS
|
||||
/* |
||||
* Finish the digest. |
||||
*/ |
||||
tempResult = altCtx.digest(); |
||||
|
||||
// 20. produce byte sequence S of the same length as the salt string where
|
||||
//
|
||||
// a) for each block of 32 or 64 bytes of length of the salt string
|
||||
// the entire digest DS is used
|
||||
//
|
||||
// b) for the remaining N (up to 31 or 63) bytes use the first N
|
||||
// bytes of digest DS
|
||||
/* |
||||
* Create byte sequence S. |
||||
*/ |
||||
// Remark: The salt is limited to 16 chars, how does this make sense?
|
||||
final byte[] sBytes = new byte[saltLen]; |
||||
cp = 0; |
||||
while (cp < saltLen - blocksize) { |
||||
System.arraycopy(tempResult, 0, sBytes, cp, blocksize); |
||||
cp += blocksize; |
||||
} |
||||
System.arraycopy(tempResult, 0, sBytes, cp, saltLen - cp); |
||||
|
||||
// 21. repeat a loop according to the number specified in the rounds=<N>
|
||||
// specification in the salt (or the default value if none is
|
||||
// present). Each round is numbered, starting with 0 and up to N-1.
|
||||
//
|
||||
// The loop uses a digest as input. In the first round it is the
|
||||
// digest produced in step 12. In the latter steps it is the digest
|
||||
// produced in step 21.h. The following text uses the notation
|
||||
// "digest A/C" to describe this behavior.
|
||||
/* |
||||
* Repeatedly run the collected hash value through sha512 to burn CPU cycles. |
||||
*/ |
||||
for (int i = 0; i <= rounds - 1; i++) { |
||||
// a) start digest C
|
||||
/* |
||||
* New context. |
||||
*/ |
||||
ctx = DigestUtils.getDigest(algorithm); |
||||
|
||||
// b) for odd round numbers add the byte sequense P to digest C
|
||||
// c) for even round numbers add digest A/C
|
||||
/* |
||||
* Add key or last result. |
||||
*/ |
||||
if ((i & 1) != 0) { |
||||
ctx.update(pBytes, 0, keyLen); |
||||
} else { |
||||
ctx.update(altResult, 0, blocksize); |
||||
} |
||||
|
||||
// d) for all round numbers not divisible by 3 add the byte sequence S
|
||||
/* |
||||
* Add salt for numbers not divisible by 3. |
||||
*/ |
||||
if (i % 3 != 0) { |
||||
ctx.update(sBytes, 0, saltLen); |
||||
} |
||||
|
||||
// e) for all round numbers not divisible by 7 add the byte sequence P
|
||||
/* |
||||
* Add key for numbers not divisible by 7. |
||||
*/ |
||||
if (i % 7 != 0) { |
||||
ctx.update(pBytes, 0, keyLen); |
||||
} |
||||
|
||||
// f) for odd round numbers add digest A/C
|
||||
// g) for even round numbers add the byte sequence P
|
||||
/* |
||||
* Add key or last result. |
||||
*/ |
||||
if ((i & 1) != 0) { |
||||
ctx.update(altResult, 0, blocksize); |
||||
} else { |
||||
ctx.update(pBytes, 0, keyLen); |
||||
} |
||||
|
||||
// h) finish digest C.
|
||||
/* |
||||
* Create intermediate result. |
||||
*/ |
||||
altResult = ctx.digest(); |
||||
} |
||||
|
||||
// 22. Produce the output string. This is an ASCII string of the maximum
|
||||
// size specified above, consisting of multiple pieces:
|
||||
//
|
||||
// a) the salt salt_prefix, $5$ or $6$ respectively
|
||||
//
|
||||
// b) the rounds=<N> specification, if one was present in the input
|
||||
// salt string. A trailing '$' is added in this case to separate
|
||||
// the rounds specification from the following text.
|
||||
//
|
||||
// c) the salt string truncated to 16 characters
|
||||
//
|
||||
// d) a '$' character
|
||||
/* |
||||
* Now we can construct the result string. It consists of three parts. |
||||
*/ |
||||
final StringBuilder buffer = new StringBuilder(saltPrefix); |
||||
if (roundsCustom) { |
||||
buffer.append(ROUNDS_PREFIX); |
||||
buffer.append(rounds); |
||||
buffer.append("$"); |
||||
} |
||||
buffer.append(saltString); |
||||
buffer.append("$"); |
||||
|
||||
// e) the base-64 encoded final C digest. The encoding used is as
|
||||
// follows:
|
||||
// [...]
|
||||
//
|
||||
// Each group of three bytes from the digest produces four
|
||||
// characters as output:
|
||||
//
|
||||
// 1. character: the six low bits of the first byte
|
||||
// 2. character: the two high bits of the first byte and the
|
||||
// four low bytes from the second byte
|
||||
// 3. character: the four high bytes from the second byte and
|
||||
// the two low bits from the third byte
|
||||
// 4. character: the six high bits from the third byte
|
||||
//
|
||||
// The groups of three bytes are as follows (in this sequence).
|
||||
// These are the indices into the byte array containing the
|
||||
// digest, starting with index 0. For the last group there are
|
||||
// not enough bytes left in the digest and the value zero is used
|
||||
// in its place. This group also produces only three or two
|
||||
// characters as output for SHA-512 and SHA-512 respectively.
|
||||
|
||||
// This was just a safeguard in the C implementation:
|
||||
// int buflen = salt_prefix.length() - 1 + ROUNDS_PREFIX.length() + 9 + 1 + salt_string.length() + 1 + 86 + 1;
|
||||
|
||||
if (blocksize == 32) { |
||||
B64.b64from24bit(altResult[0], altResult[10], altResult[20], 4, buffer); |
||||
B64.b64from24bit(altResult[21], altResult[1], altResult[11], 4, buffer); |
||||
B64.b64from24bit(altResult[12], altResult[22], altResult[2], 4, buffer); |
||||
B64.b64from24bit(altResult[3], altResult[13], altResult[23], 4, buffer); |
||||
B64.b64from24bit(altResult[24], altResult[4], altResult[14], 4, buffer); |
||||
B64.b64from24bit(altResult[15], altResult[25], altResult[5], 4, buffer); |
||||
B64.b64from24bit(altResult[6], altResult[16], altResult[26], 4, buffer); |
||||
B64.b64from24bit(altResult[27], altResult[7], altResult[17], 4, buffer); |
||||
B64.b64from24bit(altResult[18], altResult[28], altResult[8], 4, buffer); |
||||
B64.b64from24bit(altResult[9], altResult[19], altResult[29], 4, buffer); |
||||
B64.b64from24bit((byte) 0, altResult[31], altResult[30], 3, buffer); |
||||
} else { |
||||
B64.b64from24bit(altResult[0], altResult[21], altResult[42], 4, buffer); |
||||
B64.b64from24bit(altResult[22], altResult[43], altResult[1], 4, buffer); |
||||
B64.b64from24bit(altResult[44], altResult[2], altResult[23], 4, buffer); |
||||
B64.b64from24bit(altResult[3], altResult[24], altResult[45], 4, buffer); |
||||
B64.b64from24bit(altResult[25], altResult[46], altResult[4], 4, buffer); |
||||
B64.b64from24bit(altResult[47], altResult[5], altResult[26], 4, buffer); |
||||
B64.b64from24bit(altResult[6], altResult[27], altResult[48], 4, buffer); |
||||
B64.b64from24bit(altResult[28], altResult[49], altResult[7], 4, buffer); |
||||
B64.b64from24bit(altResult[50], altResult[8], altResult[29], 4, buffer); |
||||
B64.b64from24bit(altResult[9], altResult[30], altResult[51], 4, buffer); |
||||
B64.b64from24bit(altResult[31], altResult[52], altResult[10], 4, buffer); |
||||
B64.b64from24bit(altResult[53], altResult[11], altResult[32], 4, buffer); |
||||
B64.b64from24bit(altResult[12], altResult[33], altResult[54], 4, buffer); |
||||
B64.b64from24bit(altResult[34], altResult[55], altResult[13], 4, buffer); |
||||
B64.b64from24bit(altResult[56], altResult[14], altResult[35], 4, buffer); |
||||
B64.b64from24bit(altResult[15], altResult[36], altResult[57], 4, buffer); |
||||
B64.b64from24bit(altResult[37], altResult[58], altResult[16], 4, buffer); |
||||
B64.b64from24bit(altResult[59], altResult[17], altResult[38], 4, buffer); |
||||
B64.b64from24bit(altResult[18], altResult[39], altResult[60], 4, buffer); |
||||
B64.b64from24bit(altResult[40], altResult[61], altResult[19], 4, buffer); |
||||
B64.b64from24bit(altResult[62], altResult[20], altResult[41], 4, buffer); |
||||
B64.b64from24bit((byte) 0, (byte) 0, altResult[63], 2, buffer); |
||||
} |
||||
|
||||
/* |
||||
* Clear the buffer for the intermediate result so that people attaching to processes or reading core dumps |
||||
* cannot get any information. |
||||
*/ |
||||
// Is there a better way to do this with the JVM?
|
||||
Arrays.fill(tempResult, (byte) 0); |
||||
Arrays.fill(pBytes, (byte) 0); |
||||
Arrays.fill(sBytes, (byte) 0); |
||||
ctx.reset(); |
||||
altCtx.reset(); |
||||
Arrays.fill(keyBytes, (byte) 0); |
||||
Arrays.fill(saltBytes, (byte) 0); |
||||
|
||||
return buffer.toString(); |
||||
} |
||||
|
||||
/** |
||||
* Generates a libc crypt() compatible "$6$" hash value with random salt. |
||||
* <p> |
||||
* See {@link Crypt#crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext to hash |
||||
* @return complete hash value |
||||
* @throws RuntimeException |
||||
* when a {@link NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String sha512Crypt(final byte[] keyBytes) { |
||||
return sha512Crypt(keyBytes, null); |
||||
} |
||||
|
||||
/** |
||||
* Generates a libc6 crypt() compatible "$6$" hash value. |
||||
* <p> |
||||
* See {@link Crypt#crypt(String, String)} for details. |
||||
* |
||||
* @param keyBytes |
||||
* plaintext to hash |
||||
* @param salt |
||||
* real salt value without prefix or "rounds=" |
||||
* @return complete hash value including salt |
||||
* @throws IllegalArgumentException |
||||
* if the salt does not match the allowed pattern |
||||
* @throws RuntimeException |
||||
* when a {@link NoSuchAlgorithmException} is caught. |
||||
*/ |
||||
public static String sha512Crypt(final byte[] keyBytes, String salt) { |
||||
if (salt == null) { |
||||
salt = SHA512_PREFIX + B64.getRandomSalt(8); |
||||
} |
||||
return sha2Crypt(keyBytes, salt, SHA512_PREFIX, SHA512_BLOCKSIZE, MessageDigestAlgorithms.SHA_512); |
||||
} |
||||
} |
@ -0,0 +1,413 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.digest; |
||||
|
||||
import java.util.Random; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.Charsets; |
||||
|
||||
/** |
||||
* Unix crypt(3) algorithm implementation. |
||||
* <p> |
||||
* This class only implements the traditional 56 bit DES based algorithm. Please use DigestUtils.crypt() for a method |
||||
* that distinguishes between all the algorithms supported in the current glibc's crypt(). |
||||
* <p> |
||||
* The Java implementation was taken from the JetSpeed Portal project (see |
||||
* org.apache.jetspeed.services.security.ldap.UnixCrypt). |
||||
* <p> |
||||
* This class is slightly incompatible if the given salt contains characters that are not part of the allowed range |
||||
* [a-zA-Z0-9./]. |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* |
||||
* @version $Id: UnixCrypt.java 1429868 2013-01-07 16:08:05Z ggregory $ |
||||
* @since 1.7 |
||||
*/ |
||||
public class UnixCrypt { |
||||
|
||||
private static final int CON_SALT[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 5, 6, |
||||
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, |
||||
34, 35, 36, 37, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, |
||||
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 0, 0, 0, 0 }; |
||||
|
||||
private static final int COV2CHAR[] = { 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 65, 66, 67, 68, 69, 70, |
||||
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, |
||||
103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122 }; |
||||
|
||||
private static final char SALT_CHARS[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./" |
||||
.toCharArray(); |
||||
|
||||
private static final boolean SHIFT2[] = { false, false, true, true, true, true, true, true, false, true, true, |
||||
true, true, true, true, false }; |
||||
|
||||
private static final int SKB[][] = { |
||||
{ 0, 16, 0x20000000, 0x20000010, 0x10000, 0x10010, 0x20010000, 0x20010010, 2048, 2064, 0x20000800, |
||||
0x20000810, 0x10800, 0x10810, 0x20010800, 0x20010810, 32, 48, 0x20000020, 0x20000030, 0x10020, |
||||
0x10030, 0x20010020, 0x20010030, 2080, 2096, 0x20000820, 0x20000830, 0x10820, 0x10830, 0x20010820, |
||||
0x20010830, 0x80000, 0x80010, 0x20080000, 0x20080010, 0x90000, 0x90010, 0x20090000, 0x20090010, |
||||
0x80800, 0x80810, 0x20080800, 0x20080810, 0x90800, 0x90810, 0x20090800, 0x20090810, 0x80020, |
||||
0x80030, 0x20080020, 0x20080030, 0x90020, 0x90030, 0x20090020, 0x20090030, 0x80820, 0x80830, |
||||
0x20080820, 0x20080830, 0x90820, 0x90830, 0x20090820, 0x20090830 }, |
||||
{ 0, 0x2000000, 8192, 0x2002000, 0x200000, 0x2200000, 0x202000, 0x2202000, 4, 0x2000004, 8196, 0x2002004, |
||||
0x200004, 0x2200004, 0x202004, 0x2202004, 1024, 0x2000400, 9216, 0x2002400, 0x200400, 0x2200400, |
||||
0x202400, 0x2202400, 1028, 0x2000404, 9220, 0x2002404, 0x200404, 0x2200404, 0x202404, 0x2202404, |
||||
0x10000000, 0x12000000, 0x10002000, 0x12002000, 0x10200000, 0x12200000, 0x10202000, 0x12202000, |
||||
0x10000004, 0x12000004, 0x10002004, 0x12002004, 0x10200004, 0x12200004, 0x10202004, 0x12202004, |
||||
0x10000400, 0x12000400, 0x10002400, 0x12002400, 0x10200400, 0x12200400, 0x10202400, 0x12202400, |
||||
0x10000404, 0x12000404, 0x10002404, 0x12002404, 0x10200404, 0x12200404, 0x10202404, 0x12202404 }, |
||||
{ 0, 1, 0x40000, 0x40001, 0x1000000, 0x1000001, 0x1040000, 0x1040001, 2, 3, 0x40002, 0x40003, 0x1000002, |
||||
0x1000003, 0x1040002, 0x1040003, 512, 513, 0x40200, 0x40201, 0x1000200, 0x1000201, 0x1040200, |
||||
0x1040201, 514, 515, 0x40202, 0x40203, 0x1000202, 0x1000203, 0x1040202, 0x1040203, 0x8000000, |
||||
0x8000001, 0x8040000, 0x8040001, 0x9000000, 0x9000001, 0x9040000, 0x9040001, 0x8000002, 0x8000003, |
||||
0x8040002, 0x8040003, 0x9000002, 0x9000003, 0x9040002, 0x9040003, 0x8000200, 0x8000201, 0x8040200, |
||||
0x8040201, 0x9000200, 0x9000201, 0x9040200, 0x9040201, 0x8000202, 0x8000203, 0x8040202, 0x8040203, |
||||
0x9000202, 0x9000203, 0x9040202, 0x9040203 }, |
||||
{ 0, 0x100000, 256, 0x100100, 8, 0x100008, 264, 0x100108, 4096, 0x101000, 4352, 0x101100, 4104, 0x101008, |
||||
4360, 0x101108, 0x4000000, 0x4100000, 0x4000100, 0x4100100, 0x4000008, 0x4100008, 0x4000108, |
||||
0x4100108, 0x4001000, 0x4101000, 0x4001100, 0x4101100, 0x4001008, 0x4101008, 0x4001108, 0x4101108, |
||||
0x20000, 0x120000, 0x20100, 0x120100, 0x20008, 0x120008, 0x20108, 0x120108, 0x21000, 0x121000, |
||||
0x21100, 0x121100, 0x21008, 0x121008, 0x21108, 0x121108, 0x4020000, 0x4120000, 0x4020100, |
||||
0x4120100, 0x4020008, 0x4120008, 0x4020108, 0x4120108, 0x4021000, 0x4121000, 0x4021100, 0x4121100, |
||||
0x4021008, 0x4121008, 0x4021108, 0x4121108 }, |
||||
{ 0, 0x10000000, 0x10000, 0x10010000, 4, 0x10000004, 0x10004, 0x10010004, 0x20000000, 0x30000000, |
||||
0x20010000, 0x30010000, 0x20000004, 0x30000004, 0x20010004, 0x30010004, 0x100000, 0x10100000, |
||||
0x110000, 0x10110000, 0x100004, 0x10100004, 0x110004, 0x10110004, 0x20100000, 0x30100000, |
||||
0x20110000, 0x30110000, 0x20100004, 0x30100004, 0x20110004, 0x30110004, 4096, 0x10001000, 0x11000, |
||||
0x10011000, 4100, 0x10001004, 0x11004, 0x10011004, 0x20001000, 0x30001000, 0x20011000, 0x30011000, |
||||
0x20001004, 0x30001004, 0x20011004, 0x30011004, 0x101000, 0x10101000, 0x111000, 0x10111000, |
||||
0x101004, 0x10101004, 0x111004, 0x10111004, 0x20101000, 0x30101000, 0x20111000, 0x30111000, |
||||
0x20101004, 0x30101004, 0x20111004, 0x30111004 }, |
||||
{ 0, 0x8000000, 8, 0x8000008, 1024, 0x8000400, 1032, 0x8000408, 0x20000, 0x8020000, 0x20008, 0x8020008, |
||||
0x20400, 0x8020400, 0x20408, 0x8020408, 1, 0x8000001, 9, 0x8000009, 1025, 0x8000401, 1033, |
||||
0x8000409, 0x20001, 0x8020001, 0x20009, 0x8020009, 0x20401, 0x8020401, 0x20409, 0x8020409, |
||||
0x2000000, 0xa000000, 0x2000008, 0xa000008, 0x2000400, 0xa000400, 0x2000408, 0xa000408, 0x2020000, |
||||
0xa020000, 0x2020008, 0xa020008, 0x2020400, 0xa020400, 0x2020408, 0xa020408, 0x2000001, 0xa000001, |
||||
0x2000009, 0xa000009, 0x2000401, 0xa000401, 0x2000409, 0xa000409, 0x2020001, 0xa020001, 0x2020009, |
||||
0xa020009, 0x2020401, 0xa020401, 0x2020409, 0xa020409 }, |
||||
{ 0, 256, 0x80000, 0x80100, 0x1000000, 0x1000100, 0x1080000, 0x1080100, 16, 272, 0x80010, 0x80110, |
||||
0x1000010, 0x1000110, 0x1080010, 0x1080110, 0x200000, 0x200100, 0x280000, 0x280100, 0x1200000, |
||||
0x1200100, 0x1280000, 0x1280100, 0x200010, 0x200110, 0x280010, 0x280110, 0x1200010, 0x1200110, |
||||
0x1280010, 0x1280110, 512, 768, 0x80200, 0x80300, 0x1000200, 0x1000300, 0x1080200, 0x1080300, 528, |
||||
784, 0x80210, 0x80310, 0x1000210, 0x1000310, 0x1080210, 0x1080310, 0x200200, 0x200300, 0x280200, |
||||
0x280300, 0x1200200, 0x1200300, 0x1280200, 0x1280300, 0x200210, 0x200310, 0x280210, 0x280310, |
||||
0x1200210, 0x1200310, 0x1280210, 0x1280310 }, |
||||
{ 0, 0x4000000, 0x40000, 0x4040000, 2, 0x4000002, 0x40002, 0x4040002, 8192, 0x4002000, 0x42000, 0x4042000, |
||||
8194, 0x4002002, 0x42002, 0x4042002, 32, 0x4000020, 0x40020, 0x4040020, 34, 0x4000022, 0x40022, |
||||
0x4040022, 8224, 0x4002020, 0x42020, 0x4042020, 8226, 0x4002022, 0x42022, 0x4042022, 2048, |
||||
0x4000800, 0x40800, 0x4040800, 2050, 0x4000802, 0x40802, 0x4040802, 10240, 0x4002800, 0x42800, |
||||
0x4042800, 10242, 0x4002802, 0x42802, 0x4042802, 2080, 0x4000820, 0x40820, 0x4040820, 2082, |
||||
0x4000822, 0x40822, 0x4040822, 10272, 0x4002820, 0x42820, 0x4042820, 10274, 0x4002822, 0x42822, |
||||
0x4042822 } }; |
||||
|
||||
private static final int SPTRANS[][] = { |
||||
{ 0x820200, 0x20000, 0x80800000, 0x80820200, 0x800000, 0x80020200, 0x80020000, 0x80800000, 0x80020200, |
||||
0x820200, 0x820000, 0x80000200, 0x80800200, 0x800000, 0, 0x80020000, 0x20000, 0x80000000, |
||||
0x800200, 0x20200, 0x80820200, 0x820000, 0x80000200, 0x800200, 0x80000000, 512, 0x20200, |
||||
0x80820000, 512, 0x80800200, 0x80820000, 0, 0, 0x80820200, 0x800200, 0x80020000, 0x820200, |
||||
0x20000, 0x80000200, 0x800200, 0x80820000, 512, 0x20200, 0x80800000, 0x80020200, 0x80000000, |
||||
0x80800000, 0x820000, 0x80820200, 0x20200, 0x820000, 0x80800200, 0x800000, 0x80000200, 0x80020000, |
||||
0, 0x20000, 0x800000, 0x80800200, 0x820200, 0x80000000, 0x80820000, 512, 0x80020200 }, |
||||
{ 0x10042004, 0, 0x42000, 0x10040000, 0x10000004, 8196, 0x10002000, 0x42000, 8192, 0x10040004, 4, |
||||
0x10002000, 0x40004, 0x10042000, 0x10040000, 4, 0x40000, 0x10002004, 0x10040004, 8192, 0x42004, |
||||
0x10000000, 0, 0x40004, 0x10002004, 0x42004, 0x10042000, 0x10000004, 0x10000000, 0x40000, 8196, |
||||
0x10042004, 0x40004, 0x10042000, 0x10002000, 0x42004, 0x10042004, 0x40004, 0x10000004, 0, |
||||
0x10000000, 8196, 0x40000, 0x10040004, 8192, 0x10000000, 0x42004, 0x10002004, 0x10042000, 8192, 0, |
||||
0x10000004, 4, 0x10042004, 0x42000, 0x10040000, 0x10040004, 0x40000, 8196, 0x10002000, 0x10002004, |
||||
4, 0x10040000, 0x42000 }, |
||||
{ 0x41000000, 0x1010040, 64, 0x41000040, 0x40010000, 0x1000000, 0x41000040, 0x10040, 0x1000040, 0x10000, |
||||
0x1010000, 0x40000000, 0x41010040, 0x40000040, 0x40000000, 0x41010000, 0, 0x40010000, 0x1010040, |
||||
64, 0x40000040, 0x41010040, 0x10000, 0x41000000, 0x41010000, 0x1000040, 0x40010040, 0x1010000, |
||||
0x10040, 0, 0x1000000, 0x40010040, 0x1010040, 64, 0x40000000, 0x10000, 0x40000040, 0x40010000, |
||||
0x1010000, 0x41000040, 0, 0x1010040, 0x10040, 0x41010000, 0x40010000, 0x1000000, 0x41010040, |
||||
0x40000000, 0x40010040, 0x41000000, 0x1000000, 0x41010040, 0x10000, 0x1000040, 0x41000040, |
||||
0x10040, 0x1000040, 0, 0x41010000, 0x40000040, 0x41000000, 0x40010040, 64, 0x1010000 }, |
||||
{ 0x100402, 0x4000400, 2, 0x4100402, 0, 0x4100000, 0x4000402, 0x100002, 0x4100400, 0x4000002, 0x4000000, |
||||
1026, 0x4000002, 0x100402, 0x100000, 0x4000000, 0x4100002, 0x100400, 1024, 2, 0x100400, 0x4000402, |
||||
0x4100000, 1024, 1026, 0, 0x100002, 0x4100400, 0x4000400, 0x4100002, 0x4100402, 0x100000, |
||||
0x4100002, 1026, 0x100000, 0x4000002, 0x100400, 0x4000400, 2, 0x4100000, 0x4000402, 0, 1024, |
||||
0x100002, 0, 0x4100002, 0x4100400, 1024, 0x4000000, 0x4100402, 0x100402, 0x100000, 0x4100402, 2, |
||||
0x4000400, 0x100402, 0x100002, 0x100400, 0x4100000, 0x4000402, 1026, 0x4000000, 0x4000002, |
||||
0x4100400 }, |
||||
{ 0x2000000, 16384, 256, 0x2004108, 0x2004008, 0x2000100, 16648, 0x2004000, 16384, 8, 0x2000008, 16640, |
||||
0x2000108, 0x2004008, 0x2004100, 0, 16640, 0x2000000, 16392, 264, 0x2000100, 16648, 0, 0x2000008, |
||||
8, 0x2000108, 0x2004108, 16392, 0x2004000, 256, 264, 0x2004100, 0x2004100, 0x2000108, 16392, |
||||
0x2004000, 16384, 8, 0x2000008, 0x2000100, 0x2000000, 16640, 0x2004108, 0, 16648, 0x2000000, 256, |
||||
16392, 0x2000108, 256, 0, 0x2004108, 0x2004008, 0x2004100, 264, 16384, 16640, 0x2004008, |
||||
0x2000100, 264, 8, 16648, 0x2004000, 0x2000008 }, |
||||
{ 0x20000010, 0x80010, 0, 0x20080800, 0x80010, 2048, 0x20000810, 0x80000, 2064, 0x20080810, 0x80800, |
||||
0x20000000, 0x20000800, 0x20000010, 0x20080000, 0x80810, 0x80000, 0x20000810, 0x20080010, 0, 2048, |
||||
16, 0x20080800, 0x20080010, 0x20080810, 0x20080000, 0x20000000, 2064, 16, 0x80800, 0x80810, |
||||
0x20000800, 2064, 0x20000000, 0x20000800, 0x80810, 0x20080800, 0x80010, 0, 0x20000800, 0x20000000, |
||||
2048, 0x20080010, 0x80000, 0x80010, 0x20080810, 0x80800, 16, 0x20080810, 0x80800, 0x80000, |
||||
0x20000810, 0x20000010, 0x20080000, 0x80810, 0, 2048, 0x20000010, 0x20000810, 0x20080800, |
||||
0x20080000, 2064, 16, 0x20080010 }, |
||||
{ 4096, 128, 0x400080, 0x400001, 0x401081, 4097, 4224, 0, 0x400000, 0x400081, 129, 0x401000, 1, 0x401080, |
||||
0x401000, 129, 0x400081, 4096, 4097, 0x401081, 0, 0x400080, 0x400001, 4224, 0x401001, 4225, |
||||
0x401080, 1, 4225, 0x401001, 128, 0x400000, 4225, 0x401000, 0x401001, 129, 4096, 128, 0x400000, |
||||
0x401001, 0x400081, 4225, 4224, 0, 128, 0x400001, 1, 0x400080, 0, 0x400081, 0x400080, 4224, 129, |
||||
4096, 0x401081, 0x400000, 0x401080, 1, 4097, 0x401081, 0x400001, 0x401080, 0x401000, 4097 }, |
||||
{ 0x8200020, 0x8208000, 32800, 0, 0x8008000, 0x200020, 0x8200000, 0x8208020, 32, 0x8000000, 0x208000, |
||||
32800, 0x208020, 0x8008020, 0x8000020, 0x8200000, 32768, 0x208020, 0x200020, 0x8008000, 0x8208020, |
||||
0x8000020, 0, 0x208000, 0x8000000, 0x200000, 0x8008020, 0x8200020, 0x200000, 32768, 0x8208000, 32, |
||||
0x200000, 32768, 0x8000020, 0x8208020, 32800, 0x8000000, 0, 0x208000, 0x8200020, 0x8008020, |
||||
0x8008000, 0x200020, 0x8208000, 32, 0x200020, 0x8008000, 0x8208020, 0x200000, 0x8200000, |
||||
0x8000020, 0x208000, 32800, 0x8008020, 0x8200000, 32, 0x8208000, 0x208020, 0, 0x8000000, |
||||
0x8200020, 32768, 0x208020 } }; |
||||
|
||||
/** |
||||
* Generates a crypt(3) compatible hash using the DES algorithm. |
||||
* <p> |
||||
* As no salt is given, a random one will be used. |
||||
* |
||||
* @param original |
||||
* plaintext password |
||||
* @return a 13 character string starting with the salt string |
||||
*/ |
||||
public static String crypt(final byte[] original) { |
||||
return crypt(original, null); |
||||
} |
||||
|
||||
/** |
||||
* Generates a crypt(3) compatible hash using the DES algorithm. |
||||
* <p> |
||||
* Using unspecified characters as salt results incompatible hash values. |
||||
* |
||||
* @param original |
||||
* plaintext password |
||||
* @param salt |
||||
* a two character string drawn from [a-zA-Z0-9./] or null for a random one |
||||
* @return a 13 character string starting with the salt string |
||||
* @throws IllegalArgumentException |
||||
* if the salt does not match the allowed pattern |
||||
*/ |
||||
public static String crypt(final byte[] original, String salt) { |
||||
if (salt == null) { |
||||
final Random randomGenerator = new Random(); |
||||
final int numSaltChars = SALT_CHARS.length; |
||||
salt = "" + SALT_CHARS[randomGenerator.nextInt(numSaltChars)] + |
||||
SALT_CHARS[randomGenerator.nextInt(numSaltChars)]; |
||||
} else if (!salt.matches("^[" + B64.B64T + "]{2,}$")) { |
||||
throw new IllegalArgumentException("Invalid salt value: " + salt); |
||||
} |
||||
|
||||
final StringBuilder buffer = new StringBuilder(" "); |
||||
final char charZero = salt.charAt(0); |
||||
final char charOne = salt.charAt(1); |
||||
buffer.setCharAt(0, charZero); |
||||
buffer.setCharAt(1, charOne); |
||||
final int eSwap0 = CON_SALT[charZero]; |
||||
final int eSwap1 = CON_SALT[charOne] << 4; |
||||
final byte key[] = new byte[8]; |
||||
for (int i = 0; i < key.length; i++) { |
||||
key[i] = 0; |
||||
} |
||||
|
||||
for (int i = 0; i < key.length && i < original.length; i++) { |
||||
final int iChar = original[i]; |
||||
key[i] = (byte) (iChar << 1); |
||||
} |
||||
|
||||
final int schedule[] = desSetKey(key); |
||||
final int out[] = body(schedule, eSwap0, eSwap1); |
||||
final byte b[] = new byte[9]; |
||||
intToFourBytes(out[0], b, 0); |
||||
intToFourBytes(out[1], b, 4); |
||||
b[8] = 0; |
||||
int i = 2; |
||||
int y = 0; |
||||
int u = 128; |
||||
for (; i < 13; i++) { |
||||
int j = 0; |
||||
int c = 0; |
||||
for (; j < 6; j++) { |
||||
c <<= 1; |
||||
if ((b[y] & u) != 0) { |
||||
c |= 0x1; |
||||
} |
||||
u >>>= 1; |
||||
if (u == 0) { |
||||
y++; |
||||
u = 128; |
||||
} |
||||
buffer.setCharAt(i, (char) COV2CHAR[c]); |
||||
} |
||||
} |
||||
return buffer.toString(); |
||||
} |
||||
|
||||
/** |
||||
* Generates a crypt(3) compatible hash using the DES algorithm. |
||||
* <p> |
||||
* As no salt is given, a random one is used. |
||||
* |
||||
* @param original |
||||
* plaintext password |
||||
* @return a 13 character string starting with the salt string |
||||
*/ |
||||
public static String crypt(final String original) { |
||||
return crypt(original.getBytes(Charsets.UTF_8)); |
||||
} |
||||
|
||||
/** |
||||
* Generates a crypt(3) compatible hash using the DES algorithm. |
||||
* |
||||
* @param original |
||||
* plaintext password |
||||
* @param salt |
||||
* a two character string drawn from [a-zA-Z0-9./] or null for a random one |
||||
* @return a 13 character string starting with the salt string |
||||
* @throws IllegalArgumentException |
||||
* if the salt does not match the allowed pattern |
||||
*/ |
||||
public static String crypt(final String original, final String salt) { |
||||
return crypt(original.getBytes(Charsets.UTF_8), salt); |
||||
} |
||||
|
||||
private static int[] body(final int schedule[], final int eSwap0, final int eSwap1) { |
||||
int left = 0; |
||||
int right = 0; |
||||
int t = 0; |
||||
for (int j = 0; j < 25; j++) { |
||||
for (int i = 0; i < 32; i += 4) { |
||||
left = dEncrypt(left, right, i, eSwap0, eSwap1, schedule); |
||||
right = dEncrypt(right, left, i + 2, eSwap0, eSwap1, schedule); |
||||
} |
||||
t = left; |
||||
left = right; |
||||
right = t; |
||||
} |
||||
|
||||
t = right; |
||||
right = left >>> 1 | left << 31; |
||||
left = t >>> 1 | t << 31; |
||||
final int results[] = new int[2]; |
||||
permOp(right, left, 1, 0x55555555, results); |
||||
right = results[0]; |
||||
left = results[1]; |
||||
permOp(left, right, 8, 0xff00ff, results); |
||||
left = results[0]; |
||||
right = results[1]; |
||||
permOp(right, left, 2, 0x33333333, results); |
||||
right = results[0]; |
||||
left = results[1]; |
||||
permOp(left, right, 16, 65535, results); |
||||
left = results[0]; |
||||
right = results[1]; |
||||
permOp(right, left, 4, 0xf0f0f0f, results); |
||||
right = results[0]; |
||||
left = results[1]; |
||||
final int out[] = new int[2]; |
||||
out[0] = left; |
||||
out[1] = right; |
||||
return out; |
||||
} |
||||
|
||||
private static int byteToUnsigned(final byte b) { |
||||
final int value = b; |
||||
return value < 0 ? value + 256 : value; |
||||
} |
||||
|
||||
private static int dEncrypt(int el, final int r, final int s, final int e0, final int e1, final int sArr[]) { |
||||
int v = r ^ r >>> 16; |
||||
int u = v & e0; |
||||
v &= e1; |
||||
u = u ^ u << 16 ^ r ^ sArr[s]; |
||||
int t = v ^ v << 16 ^ r ^ sArr[s + 1]; |
||||
t = t >>> 4 | t << 28; |
||||
el ^= SPTRANS[1][t & 0x3f] | SPTRANS[3][t >>> 8 & 0x3f] | SPTRANS[5][t >>> 16 & 0x3f] | |
||||
SPTRANS[7][t >>> 24 & 0x3f] | SPTRANS[0][u & 0x3f] | SPTRANS[2][u >>> 8 & 0x3f] | |
||||
SPTRANS[4][u >>> 16 & 0x3f] | SPTRANS[6][u >>> 24 & 0x3f]; |
||||
return el; |
||||
} |
||||
|
||||
private static int[] desSetKey(final byte key[]) { |
||||
final int schedule[] = new int[32]; |
||||
int c = fourBytesToInt(key, 0); |
||||
int d = fourBytesToInt(key, 4); |
||||
final int results[] = new int[2]; |
||||
permOp(d, c, 4, 0xf0f0f0f, results); |
||||
d = results[0]; |
||||
c = results[1]; |
||||
c = hPermOp(c, -2, 0xcccc0000); |
||||
d = hPermOp(d, -2, 0xcccc0000); |
||||
permOp(d, c, 1, 0x55555555, results); |
||||
d = results[0]; |
||||
c = results[1]; |
||||
permOp(c, d, 8, 0xff00ff, results); |
||||
c = results[0]; |
||||
d = results[1]; |
||||
permOp(d, c, 1, 0x55555555, results); |
||||
d = results[0]; |
||||
c = results[1]; |
||||
d = (d & 0xff) << 16 | d & 0xff00 | (d & 0xff0000) >>> 16 | (c & 0xf0000000) >>> 4; |
||||
c &= 0xfffffff; |
||||
int j = 0; |
||||
for (int i = 0; i < 16; i++) { |
||||
if (SHIFT2[i]) { |
||||
c = c >>> 2 | c << 26; |
||||
d = d >>> 2 | d << 26; |
||||
} else { |
||||
c = c >>> 1 | c << 27; |
||||
d = d >>> 1 | d << 27; |
||||
} |
||||
c &= 0xfffffff; |
||||
d &= 0xfffffff; |
||||
int s = SKB[0][c & 0x3f] | SKB[1][c >>> 6 & 0x3 | c >>> 7 & 0x3c] | |
||||
SKB[2][c >>> 13 & 0xf | c >>> 14 & 0x30] | |
||||
SKB[3][c >>> 20 & 0x1 | c >>> 21 & 0x6 | c >>> 22 & 0x38]; |
||||
final int t = SKB[4][d & 0x3f] | SKB[5][d >>> 7 & 0x3 | d >>> 8 & 0x3c] | SKB[6][d >>> 15 & 0x3f] | |
||||
SKB[7][d >>> 21 & 0xf | d >>> 22 & 0x30]; |
||||
schedule[j++] = (t << 16 | s & 0xffff); |
||||
s = s >>> 16 | t & 0xffff0000; |
||||
s = s << 4 | s >>> 28; |
||||
schedule[j++] = s; |
||||
} |
||||
|
||||
return schedule; |
||||
} |
||||
|
||||
private static int fourBytesToInt(final byte b[], int offset) { |
||||
int value = byteToUnsigned(b[offset++]); |
||||
value |= byteToUnsigned(b[offset++]) << 8; |
||||
value |= byteToUnsigned(b[offset++]) << 16; |
||||
value |= byteToUnsigned(b[offset++]) << 24; |
||||
return value; |
||||
} |
||||
|
||||
private static int hPermOp(int a, final int n, final int m) { |
||||
final int t = (a << 16 - n ^ a) & m; |
||||
a = a ^ t ^ t >>> 16 - n; |
||||
return a; |
||||
} |
||||
|
||||
private static void intToFourBytes(final int iValue, final byte b[], int offset) { |
||||
b[offset++] = (byte) (iValue & 0xff); |
||||
b[offset++] = (byte) (iValue >>> 8 & 0xff); |
||||
b[offset++] = (byte) (iValue >>> 16 & 0xff); |
||||
b[offset++] = (byte) (iValue >>> 24 & 0xff); |
||||
} |
||||
|
||||
private static void permOp(int a, int b, final int n, final int m, final int results[]) { |
||||
final int t = (a >>> n ^ b) & m; |
||||
a ^= t << n; |
||||
b ^= t; |
||||
results[0] = a; |
||||
results[1] = b; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,198 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one |
||||
* or more contributor license agreements. See the NOTICE file |
||||
* distributed with this work for additional information |
||||
* regarding copyright ownership. The ASF licenses this file |
||||
* to you under the Apache License, Version 2.0 (the |
||||
* "License"); you may not use this file except in compliance |
||||
* with the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, |
||||
* software distributed under the License is distributed on an |
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||
* KIND, either express or implied. See the License for the |
||||
* specific language governing permissions and limitations |
||||
* under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.digest; |
||||
|
||||
import static java.lang.Integer.rotateLeft; |
||||
|
||||
import java.util.zip.Checksum; |
||||
|
||||
/** |
||||
* Implementation of the xxhash32 hash algorithm. |
||||
* |
||||
* <p>Copied from Commons Compress 1.14 |
||||
* <a href="https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD">https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD</a></p>
|
||||
* <p>NotThreadSafe</p> |
||||
* @see <a href="http://cyan4973.github.io/xxHash/">xxHash</a> |
||||
* @since 1.11 |
||||
*/ |
||||
public class XXHash32 implements Checksum { |
||||
|
||||
private static final int BUF_SIZE = 16; |
||||
private static final int ROTATE_BITS = 13; |
||||
|
||||
private static final int PRIME1 = (int) 2654435761l; |
||||
private static final int PRIME2 = (int) 2246822519l; |
||||
private static final int PRIME3 = (int) 3266489917l; |
||||
private static final int PRIME4 = 668265263; |
||||
private static final int PRIME5 = 374761393; |
||||
|
||||
private final byte[] oneByte = new byte[1]; |
||||
private final int[] state = new int[4]; |
||||
// Note: the code used to use ByteBuffer but the manual method is 50% faster
|
||||
// See: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/2f56fb5c
|
||||
private final byte[] buffer = new byte[BUF_SIZE]; |
||||
private final int seed; |
||||
|
||||
private int totalLen; |
||||
private int pos; |
||||
|
||||
/** |
||||
* Creates an XXHash32 instance with a seed of 0. |
||||
*/ |
||||
public XXHash32() { |
||||
this(0); |
||||
} |
||||
|
||||
/** |
||||
* Creates an XXHash32 instance. |
||||
* @param seed the seed to use |
||||
*/ |
||||
public XXHash32(final int seed) { |
||||
this.seed = seed; |
||||
initializeState(); |
||||
} |
||||
|
||||
@Override |
||||
public void reset() { |
||||
initializeState(); |
||||
totalLen = 0; |
||||
pos = 0; |
||||
} |
||||
|
||||
@Override |
||||
public void update(final int b) { |
||||
oneByte[0] = (byte) (b & 0xff); |
||||
update(oneByte, 0, 1); |
||||
} |
||||
|
||||
@Override |
||||
public void update(final byte[] b, int off, final int len) { |
||||
if (len <= 0) { |
||||
return; |
||||
} |
||||
totalLen += len; |
||||
|
||||
final int end = off + len; |
||||
|
||||
if (pos + len < BUF_SIZE) { |
||||
System.arraycopy(b, off, buffer, pos, len); |
||||
pos += len; |
||||
return; |
||||
} |
||||
|
||||
if (pos > 0) { |
||||
final int size = BUF_SIZE - pos; |
||||
System.arraycopy(b, off, buffer, pos, size); |
||||
process(buffer, 0); |
||||
off += size; |
||||
} |
||||
|
||||
final int limit = end - BUF_SIZE; |
||||
while (off <= limit) { |
||||
process(b, off); |
||||
off += BUF_SIZE; |
||||
} |
||||
|
||||
if (off < end) { |
||||
pos = end - off; |
||||
System.arraycopy(b, off, buffer, 0, pos); |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public long getValue() { |
||||
int hash; |
||||
if (totalLen > BUF_SIZE) { |
||||
hash = |
||||
rotateLeft(state[0], 1) + |
||||
rotateLeft(state[1], 7) + |
||||
rotateLeft(state[2], 12) + |
||||
rotateLeft(state[3], 18); |
||||
} else { |
||||
hash = state[2] + PRIME5; |
||||
} |
||||
hash += totalLen; |
||||
|
||||
int idx = 0; |
||||
final int limit = pos - 4; |
||||
for (; idx <= limit; idx += 4) { |
||||
hash = rotateLeft(hash + getInt(buffer, idx) * PRIME3, 17) * PRIME4; |
||||
} |
||||
while (idx < pos) { |
||||
hash = rotateLeft(hash + (buffer[idx++] & 0xff) * PRIME5, 11) * PRIME1; |
||||
} |
||||
|
||||
hash ^= hash >>> 15; |
||||
hash *= PRIME2; |
||||
hash ^= hash >>> 13; |
||||
hash *= PRIME3; |
||||
hash ^= hash >>> 16; |
||||
return hash & 0xffffffffl; |
||||
} |
||||
|
||||
private static int getInt(final byte[] buffer, final int idx) { |
||||
return (int) (fromLittleEndian(buffer, idx, 4) & 0xffffffffl); |
||||
} |
||||
|
||||
private void initializeState() { |
||||
state[0] = seed + PRIME1 + PRIME2; |
||||
state[1] = seed + PRIME2; |
||||
state[2] = seed; |
||||
state[3] = seed - PRIME1; |
||||
} |
||||
|
||||
private void process(final byte[] b, final int offset) { |
||||
// local shadows for performance
|
||||
int s0 = state[0]; |
||||
int s1 = state[1]; |
||||
int s2 = state[2]; |
||||
int s3 = state[3]; |
||||
|
||||
s0 = rotateLeft(s0 + getInt(b, offset) * PRIME2, ROTATE_BITS) * PRIME1; |
||||
s1 = rotateLeft(s1 + getInt(b, offset + 4) * PRIME2, ROTATE_BITS) * PRIME1; |
||||
s2 = rotateLeft(s2 + getInt(b, offset + 8) * PRIME2, ROTATE_BITS) * PRIME1; |
||||
s3 = rotateLeft(s3 + getInt(b, offset + 12) * PRIME2, ROTATE_BITS) * PRIME1; |
||||
|
||||
state[0] = s0; |
||||
state[1] = s1; |
||||
state[2] = s2; |
||||
state[3] = s3; |
||||
|
||||
pos = 0; |
||||
} |
||||
|
||||
/** |
||||
* Reads the given byte array as a little endian long. |
||||
* @param bytes the byte array to convert |
||||
* @param off the offset into the array that starts the value |
||||
* @param length the number of bytes representing the value |
||||
* @return the number read |
||||
* @throws IllegalArgumentException if len is bigger than eight |
||||
*/ |
||||
private static long fromLittleEndian(final byte[] bytes, final int off, final int length) { |
||||
if (length > 8) { |
||||
throw new IllegalArgumentException("can't read more than eight bytes into a long value"); |
||||
} |
||||
long l = 0; |
||||
for (int i = 0; i < length; i++) { |
||||
l |= (bytes[off + i] & 0xffl) << (8 * i); |
||||
} |
||||
return l; |
||||
} |
||||
} |
@ -0,0 +1,24 @@
|
||||
<!-- |
||||
Licensed to the Apache Software Foundation (ASF) under one or more |
||||
contributor license agreements. See the NOTICE file distributed with |
||||
this work for additional information regarding copyright ownership. |
||||
The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
(the "License"); you may not use this file except in compliance with |
||||
the License. You may obtain a copy of the License at |
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
Unless required by applicable law or agreed to in writing, software |
||||
distributed under the License is distributed on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
See the License for the specific language governing permissions and |
||||
limitations under the License. |
||||
--> |
||||
<html> |
||||
<body> |
||||
Simplifies common {@link java.security.MessageDigest} tasks and |
||||
includes a libc crypt(3) compatible crypt method that supports DES, |
||||
MD5, SHA-256 and SHA-512 based algorithms as well as the Apache |
||||
specific "$apr1$" variant. |
||||
</body> |
||||
</html> |
@ -0,0 +1,80 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Encodes a string into a Caverphone value. |
||||
* |
||||
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0 |
||||
* algorithm: |
||||
* |
||||
* <p>This class is immutable and thread-safe.</p> |
||||
* |
||||
* @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $ |
||||
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a> |
||||
* @since 1.5 |
||||
*/ |
||||
public abstract class AbstractCaverphone implements StringEncoder { |
||||
|
||||
/** |
||||
* Creates an instance of the Caverphone encoder |
||||
*/ |
||||
public AbstractCaverphone() { |
||||
super(); |
||||
} |
||||
|
||||
/** |
||||
* Encodes an Object using the caverphone algorithm. This method is provided in order to satisfy the requirements of |
||||
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String. |
||||
* |
||||
* @param source |
||||
* Object to encode |
||||
* @return An object (or type java.lang.String) containing the caverphone code which corresponds to the String |
||||
* supplied. |
||||
* @throws EncoderException |
||||
* if the parameter supplied is not of type java.lang.String |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object source) throws EncoderException { |
||||
if (!(source instanceof String)) { |
||||
throw new EncoderException("Parameter supplied to Caverphone encode is not of type java.lang.String"); |
||||
} |
||||
return this.encode((String) source); |
||||
} |
||||
|
||||
/** |
||||
* Tests if the encodings of two strings are equal. |
||||
* |
||||
* This method might be promoted to a new AbstractStringEncoder superclass. |
||||
* |
||||
* @param str1 |
||||
* First of two strings to compare |
||||
* @param str2 |
||||
* Second of two strings to compare |
||||
* @return <code>true</code> if the encodings of these strings are identical, <code>false</code> otherwise. |
||||
* @throws EncoderException |
||||
* thrown if there is an error condition during the encoding process. |
||||
*/ |
||||
public boolean isEncodeEqual(final String str1, final String str2) throws EncoderException { |
||||
return this.encode(str1).equals(this.encode(str2)); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,105 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Encodes a string into a Caverphone 2.0 value. Delegate to a {@link Caverphone2} instance. |
||||
* |
||||
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0 |
||||
* algorithm: |
||||
* |
||||
* @version $Id: Caverphone.java 1079535 2011-03-08 20:54:37Z ggregory $ |
||||
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a> |
||||
* @see <a href="http://caversham.otago.ac.nz/files/working/ctp150804.pdf">Caverphone 2.0 specification</a> |
||||
* @since 1.4 |
||||
* @deprecated 1.5 Replaced by {@link Caverphone2}, will be removed in 2.0. |
||||
*/ |
||||
@Deprecated |
||||
public class Caverphone implements StringEncoder { |
||||
|
||||
/** |
||||
* Delegate to a {@link Caverphone2} instance to avoid code duplication. |
||||
*/ |
||||
final private Caverphone2 encoder = new Caverphone2(); |
||||
|
||||
/** |
||||
* Creates an instance of the Caverphone encoder |
||||
*/ |
||||
public Caverphone() { |
||||
super(); |
||||
} |
||||
|
||||
/** |
||||
* Encodes the given String into a Caverphone value. |
||||
* |
||||
* @param source |
||||
* String the source string |
||||
* @return A caverphone code for the given String |
||||
*/ |
||||
public String caverphone(final String source) { |
||||
return this.encoder.encode(source); |
||||
} |
||||
|
||||
/** |
||||
* Encodes an Object using the caverphone algorithm. This method is provided in order to satisfy the requirements of |
||||
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String. |
||||
* |
||||
* @param obj |
||||
* Object to encode |
||||
* @return An object (or type java.lang.String) containing the caverphone code which corresponds to the String |
||||
* supplied. |
||||
* @throws EncoderException |
||||
* if the parameter supplied is not of type java.lang.String |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object obj) throws EncoderException { |
||||
if (!(obj instanceof String)) { |
||||
throw new EncoderException("Parameter supplied to Caverphone encode is not of type java.lang.String"); |
||||
} |
||||
return this.caverphone((String) obj); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a String using the Caverphone algorithm. |
||||
* |
||||
* @param str |
||||
* String object to encode |
||||
* @return The caverphone code corresponding to the String supplied |
||||
*/ |
||||
@Override |
||||
public String encode(final String str) { |
||||
return this.caverphone(str); |
||||
} |
||||
|
||||
/** |
||||
* Tests if the caverphones of two strings are identical. |
||||
* |
||||
* @param str1 |
||||
* First of two strings to compare |
||||
* @param str2 |
||||
* Second of two strings to compare |
||||
* @return <code>true</code> if the caverphones of these strings are identical, <code>false</code> otherwise. |
||||
*/ |
||||
public boolean isCaverphoneEqual(final String str1, final String str2) { |
||||
return this.caverphone(str1).equals(this.caverphone(str2)); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,127 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
/** |
||||
* Encodes a string into a Caverphone 1.0 value. |
||||
* |
||||
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 1.0 |
||||
* algorithm: |
||||
* |
||||
* @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $ |
||||
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a> |
||||
* @see <a href="http://caversham.otago.ac.nz/files/working/ctp060902.pdf">Caverphone 1.0 specification</a> |
||||
* @since 1.5 |
||||
* |
||||
* <p>This class is immutable and thread-safe.</p> |
||||
*/ |
||||
public class Caverphone1 extends AbstractCaverphone { |
||||
|
||||
private static final String SIX_1 = "111111"; |
||||
|
||||
/** |
||||
* Encodes the given String into a Caverphone value. |
||||
* |
||||
* @param source |
||||
* String the source string |
||||
* @return A caverphone code for the given String |
||||
*/ |
||||
@Override |
||||
public String encode(final String source) { |
||||
String txt = source; |
||||
if (txt == null || txt.length() == 0) { |
||||
return SIX_1; |
||||
} |
||||
|
||||
// 1. Convert to lowercase
|
||||
txt = txt.toLowerCase(java.util.Locale.ENGLISH); |
||||
|
||||
// 2. Remove anything not A-Z
|
||||
txt = txt.replaceAll("[^a-z]", ""); |
||||
|
||||
// 3. Handle various start options
|
||||
// 2 is a temporary placeholder to indicate a consonant which we are no longer interested in.
|
||||
txt = txt.replaceAll("^cough", "cou2f"); |
||||
txt = txt.replaceAll("^rough", "rou2f"); |
||||
txt = txt.replaceAll("^tough", "tou2f"); |
||||
txt = txt.replaceAll("^enough", "enou2f"); |
||||
txt = txt.replaceAll("^gn", "2n"); |
||||
|
||||
// End
|
||||
txt = txt.replaceAll("mb$", "m2"); |
||||
|
||||
// 4. Handle replacements
|
||||
txt = txt.replaceAll("cq", "2q"); |
||||
txt = txt.replaceAll("ci", "si"); |
||||
txt = txt.replaceAll("ce", "se"); |
||||
txt = txt.replaceAll("cy", "sy"); |
||||
txt = txt.replaceAll("tch", "2ch"); |
||||
txt = txt.replaceAll("c", "k"); |
||||
txt = txt.replaceAll("q", "k"); |
||||
txt = txt.replaceAll("x", "k"); |
||||
txt = txt.replaceAll("v", "f"); |
||||
txt = txt.replaceAll("dg", "2g"); |
||||
txt = txt.replaceAll("tio", "sio"); |
||||
txt = txt.replaceAll("tia", "sia"); |
||||
txt = txt.replaceAll("d", "t"); |
||||
txt = txt.replaceAll("ph", "fh"); |
||||
txt = txt.replaceAll("b", "p"); |
||||
txt = txt.replaceAll("sh", "s2"); |
||||
txt = txt.replaceAll("z", "s"); |
||||
txt = txt.replaceAll("^[aeiou]", "A"); |
||||
// 3 is a temporary placeholder marking a vowel
|
||||
txt = txt.replaceAll("[aeiou]", "3"); |
||||
txt = txt.replaceAll("3gh3", "3kh3"); |
||||
txt = txt.replaceAll("gh", "22"); |
||||
txt = txt.replaceAll("g", "k"); |
||||
txt = txt.replaceAll("s+", "S"); |
||||
txt = txt.replaceAll("t+", "T"); |
||||
txt = txt.replaceAll("p+", "P"); |
||||
txt = txt.replaceAll("k+", "K"); |
||||
txt = txt.replaceAll("f+", "F"); |
||||
txt = txt.replaceAll("m+", "M"); |
||||
txt = txt.replaceAll("n+", "N"); |
||||
txt = txt.replaceAll("w3", "W3"); |
||||
txt = txt.replaceAll("wy", "Wy"); // 1.0 only
|
||||
txt = txt.replaceAll("wh3", "Wh3"); |
||||
txt = txt.replaceAll("why", "Why"); // 1.0 only
|
||||
txt = txt.replaceAll("w", "2"); |
||||
txt = txt.replaceAll("^h", "A"); |
||||
txt = txt.replaceAll("h", "2"); |
||||
txt = txt.replaceAll("r3", "R3"); |
||||
txt = txt.replaceAll("ry", "Ry"); // 1.0 only
|
||||
txt = txt.replaceAll("r", "2"); |
||||
txt = txt.replaceAll("l3", "L3"); |
||||
txt = txt.replaceAll("ly", "Ly"); // 1.0 only
|
||||
txt = txt.replaceAll("l", "2"); |
||||
txt = txt.replaceAll("j", "y"); // 1.0 only
|
||||
txt = txt.replaceAll("y3", "Y3"); // 1.0 only
|
||||
txt = txt.replaceAll("y", "2"); // 1.0 only
|
||||
|
||||
// 5. Handle removals
|
||||
txt = txt.replaceAll("2", ""); |
||||
txt = txt.replaceAll("3", ""); |
||||
|
||||
// 6. put ten 1s on the end
|
||||
txt = txt + SIX_1; |
||||
|
||||
// 7. take the first six characters as the code
|
||||
return txt.substring(0, SIX_1.length()); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,131 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
/** |
||||
* Encodes a string into a Caverphone 2.0 value. |
||||
* |
||||
* This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0 |
||||
* algorithm: |
||||
* |
||||
* @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $ |
||||
* @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a> |
||||
* @see <a href="http://caversham.otago.ac.nz/files/working/ctp150804.pdf">Caverphone 2.0 specification</a> |
||||
* @since 1.5 |
||||
* |
||||
* <p>This class is immutable and thread-safe.</p> |
||||
*/ |
||||
public class Caverphone2 extends AbstractCaverphone { |
||||
|
||||
private static final String TEN_1 = "1111111111"; |
||||
|
||||
/** |
||||
* Encodes the given String into a Caverphone 2.0 value. |
||||
* |
||||
* @param source |
||||
* String the source string |
||||
* @return A caverphone code for the given String |
||||
*/ |
||||
@Override |
||||
public String encode(final String source) { |
||||
String txt = source; |
||||
if (txt == null || txt.length() == 0) { |
||||
return TEN_1; |
||||
} |
||||
|
||||
// 1. Convert to lowercase
|
||||
txt = txt.toLowerCase(java.util.Locale.ENGLISH); |
||||
|
||||
// 2. Remove anything not A-Z
|
||||
txt = txt.replaceAll("[^a-z]", ""); |
||||
|
||||
// 2.5. Remove final e
|
||||
txt = txt.replaceAll("e$", ""); // 2.0 only
|
||||
|
||||
// 3. Handle various start options
|
||||
txt = txt.replaceAll("^cough", "cou2f"); |
||||
txt = txt.replaceAll("^rough", "rou2f"); |
||||
txt = txt.replaceAll("^tough", "tou2f"); |
||||
txt = txt.replaceAll("^enough", "enou2f"); // 2.0 only
|
||||
txt = txt.replaceAll("^trough", "trou2f"); // 2.0 only
|
||||
// note the spec says ^enough here again, c+p error I assume
|
||||
txt = txt.replaceAll("^gn", "2n"); |
||||
|
||||
// End
|
||||
txt = txt.replaceAll("mb$", "m2"); |
||||
|
||||
// 4. Handle replacements
|
||||
txt = txt.replaceAll("cq", "2q"); |
||||
txt = txt.replaceAll("ci", "si"); |
||||
txt = txt.replaceAll("ce", "se"); |
||||
txt = txt.replaceAll("cy", "sy"); |
||||
txt = txt.replaceAll("tch", "2ch"); |
||||
txt = txt.replaceAll("c", "k"); |
||||
txt = txt.replaceAll("q", "k"); |
||||
txt = txt.replaceAll("x", "k"); |
||||
txt = txt.replaceAll("v", "f"); |
||||
txt = txt.replaceAll("dg", "2g"); |
||||
txt = txt.replaceAll("tio", "sio"); |
||||
txt = txt.replaceAll("tia", "sia"); |
||||
txt = txt.replaceAll("d", "t"); |
||||
txt = txt.replaceAll("ph", "fh"); |
||||
txt = txt.replaceAll("b", "p"); |
||||
txt = txt.replaceAll("sh", "s2"); |
||||
txt = txt.replaceAll("z", "s"); |
||||
txt = txt.replaceAll("^[aeiou]", "A"); |
||||
txt = txt.replaceAll("[aeiou]", "3"); |
||||
txt = txt.replaceAll("j", "y"); // 2.0 only
|
||||
txt = txt.replaceAll("^y3", "Y3"); // 2.0 only
|
||||
txt = txt.replaceAll("^y", "A"); // 2.0 only
|
||||
txt = txt.replaceAll("y", "3"); // 2.0 only
|
||||
txt = txt.replaceAll("3gh3", "3kh3"); |
||||
txt = txt.replaceAll("gh", "22"); |
||||
txt = txt.replaceAll("g", "k"); |
||||
txt = txt.replaceAll("s+", "S"); |
||||
txt = txt.replaceAll("t+", "T"); |
||||
txt = txt.replaceAll("p+", "P"); |
||||
txt = txt.replaceAll("k+", "K"); |
||||
txt = txt.replaceAll("f+", "F"); |
||||
txt = txt.replaceAll("m+", "M"); |
||||
txt = txt.replaceAll("n+", "N"); |
||||
txt = txt.replaceAll("w3", "W3"); |
||||
txt = txt.replaceAll("wh3", "Wh3"); |
||||
txt = txt.replaceAll("w$", "3"); // 2.0 only
|
||||
txt = txt.replaceAll("w", "2"); |
||||
txt = txt.replaceAll("^h", "A"); |
||||
txt = txt.replaceAll("h", "2"); |
||||
txt = txt.replaceAll("r3", "R3"); |
||||
txt = txt.replaceAll("r$", "3"); // 2.0 only
|
||||
txt = txt.replaceAll("r", "2"); |
||||
txt = txt.replaceAll("l3", "L3"); |
||||
txt = txt.replaceAll("l$", "3"); // 2.0 only
|
||||
txt = txt.replaceAll("l", "2"); |
||||
|
||||
// 5. Handle removals
|
||||
txt = txt.replaceAll("2", ""); |
||||
txt = txt.replaceAll("3$", "A"); // 2.0 only
|
||||
txt = txt.replaceAll("3", ""); |
||||
|
||||
// 6. put ten 1s on the end
|
||||
txt = txt + TEN_1; |
||||
|
||||
// 7. take the first ten characters as the code
|
||||
return txt.substring(0, TEN_1.length()); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,445 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
import java.util.Locale; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Encodes a string into a Cologne Phonetic value. |
||||
* <p> |
||||
* Implements the <a href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik">Kölner Phonetik</a> (Cologne |
||||
* Phonetic) algorithm issued by Hans Joachim Postel in 1969. |
||||
* </p> |
||||
* <p> |
||||
* The <i>Kölner Phonetik</i> is a phonetic algorithm which is optimized for the German language. It is related to |
||||
* the well-known soundex algorithm. |
||||
* </p> |
||||
* |
||||
* <h2>Algorithm</h2> |
||||
* |
||||
* <ul> |
||||
* |
||||
* <li> |
||||
* <h3>Step 1:</h3> |
||||
* After preprocessing (conversion to upper case, transcription of <a |
||||
* href="http://en.wikipedia.org/wiki/Germanic_umlaut">germanic umlauts</a>, removal of non alphabetical characters) the |
||||
* letters of the supplied text are replaced by their phonetic code according to the following table. |
||||
* <table border="1"> |
||||
* <caption style="caption-side: bottom"><small><i>(Source: <a |
||||
* href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik#Buchstabencodes">Wikipedia (de): Kölner Phonetik -- |
||||
* Buchstabencodes</a>)</i></small></caption> <tbody> |
||||
* <tr> |
||||
* <th>Letter</th> |
||||
* <th>Context</th> |
||||
* <th align="center">Code</th> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>A, E, I, J, O, U, Y</td> |
||||
* <td></td> |
||||
* <td align="center">0</td> |
||||
* </tr> |
||||
* <tr> |
||||
* |
||||
* <td>H</td> |
||||
* <td></td> |
||||
* <td align="center">-</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>B</td> |
||||
* <td></td> |
||||
* <td rowspan="2" align="center">1</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>P</td> |
||||
* <td>not before H</td> |
||||
* |
||||
* </tr> |
||||
* <tr> |
||||
* <td>D, T</td> |
||||
* <td>not before C, S, Z</td> |
||||
* <td align="center">2</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>F, V, W</td> |
||||
* <td></td> |
||||
* <td rowspan="2" align="center">3</td> |
||||
* </tr> |
||||
* <tr> |
||||
* |
||||
* <td>P</td> |
||||
* <td>before H</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>G, K, Q</td> |
||||
* <td></td> |
||||
* <td rowspan="3" align="center">4</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td rowspan="2">C</td> |
||||
* <td>at onset before A, H, K, L, O, Q, R, U, X</td> |
||||
* |
||||
* </tr> |
||||
* <tr> |
||||
* <td>before A, H, K, O, Q, U, X except after S, Z</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>X</td> |
||||
* <td>not after C, K, Q</td> |
||||
* <td align="center">48</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>L</td> |
||||
* <td></td> |
||||
* |
||||
* <td align="center">5</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>M, N</td> |
||||
* <td></td> |
||||
* <td align="center">6</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>R</td> |
||||
* <td></td> |
||||
* <td align="center">7</td> |
||||
* </tr> |
||||
* |
||||
* <tr> |
||||
* <td>S, Z</td> |
||||
* <td></td> |
||||
* <td rowspan="6" align="center">8</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td rowspan="3">C</td> |
||||
* <td>after S, Z</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>at onset except before A, H, K, L, O, Q, R, U, X</td> |
||||
* </tr> |
||||
* |
||||
* <tr> |
||||
* <td>not before A, H, K, O, Q, U, X</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>D, T</td> |
||||
* <td>before C, S, Z</td> |
||||
* </tr> |
||||
* <tr> |
||||
* <td>X</td> |
||||
* <td>after C, K, Q</td> |
||||
* </tr> |
||||
* </tbody> |
||||
* </table> |
||||
* |
||||
* <h4>Example:</h4> |
||||
* |
||||
* <code>"M</code>ü<code>ller-L</code>ü |
||||
* <code>denscheidt" => "MULLERLUDENSCHEIDT" => "6005507500206880022"</code> |
||||
* |
||||
* </li> |
||||
* |
||||
* <li> |
||||
* <h3>Step 2:</h3> |
||||
* Collapse of all multiple consecutive code digits. |
||||
* <h4>Example:</h4> |
||||
* <code>"6005507500206880022" => "6050750206802"</code></li> |
||||
* |
||||
* <li> |
||||
* <h3>Step 3:</h3> |
||||
* Removal of all codes "0" except at the beginning. This means that two or more identical consecutive digits can occur |
||||
* if they occur after removing the "0" digits. |
||||
* |
||||
* <h4>Example:</h4> |
||||
* <code>"6050750206802" => "65752682"</code></li> |
||||
* |
||||
* </ul> |
||||
* |
||||
* <p> |
||||
* This class is thread-safe. |
||||
* </p> |
||||
* |
||||
* @see <a href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik">Wikipedia (de): Kölner Phonetik (in German)</a> |
||||
* @since 1.5 |
||||
*/ |
||||
public class ColognePhonetic implements StringEncoder { |
||||
|
||||
// Predefined char arrays for better performance and less GC load
|
||||
private static final char[] AEIJOUY = new char[] { 'A', 'E', 'I', 'J', 'O', 'U', 'Y' }; |
||||
private static final char[] SCZ = new char[] { 'S', 'C', 'Z' }; |
||||
private static final char[] WFPV = new char[] { 'W', 'F', 'P', 'V' }; |
||||
private static final char[] GKQ = new char[] { 'G', 'K', 'Q' }; |
||||
private static final char[] CKQ = new char[] { 'C', 'K', 'Q' }; |
||||
private static final char[] AHKLOQRUX = new char[] { 'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X' }; |
||||
private static final char[] SZ = new char[] { 'S', 'Z' }; |
||||
private static final char[] AHOUKQX = new char[] { 'A', 'H', 'O', 'U', 'K', 'Q', 'X' }; |
||||
private static final char[] TDX = new char[] { 'T', 'D', 'X' }; |
||||
|
||||
/** |
||||
* This class is not thread-safe; the field {@link #length} is mutable. |
||||
* However, it is not shared between threads, as it is constructed on demand |
||||
* by the method {@link ColognePhonetic#colognePhonetic(String)} |
||||
*/ |
||||
private abstract class CologneBuffer { |
||||
|
||||
protected final char[] data; |
||||
|
||||
protected int length = 0; |
||||
|
||||
public CologneBuffer(final char[] data) { |
||||
this.data = data; |
||||
this.length = data.length; |
||||
} |
||||
|
||||
public CologneBuffer(final int buffSize) { |
||||
this.data = new char[buffSize]; |
||||
this.length = 0; |
||||
} |
||||
|
||||
protected abstract char[] copyData(int start, final int length); |
||||
|
||||
public int length() { |
||||
return length; |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
return new String(copyData(0, length)); |
||||
} |
||||
} |
||||
|
||||
private class CologneOutputBuffer extends CologneBuffer { |
||||
|
||||
public CologneOutputBuffer(final int buffSize) { |
||||
super(buffSize); |
||||
} |
||||
|
||||
public void addRight(final char chr) { |
||||
data[length] = chr; |
||||
length++; |
||||
} |
||||
|
||||
@Override |
||||
protected char[] copyData(final int start, final int length) { |
||||
final char[] newData = new char[length]; |
||||
System.arraycopy(data, start, newData, 0, length); |
||||
return newData; |
||||
} |
||||
} |
||||
|
||||
private class CologneInputBuffer extends CologneBuffer { |
||||
|
||||
public CologneInputBuffer(final char[] data) { |
||||
super(data); |
||||
} |
||||
|
||||
public void addLeft(final char ch) { |
||||
length++; |
||||
data[getNextPos()] = ch; |
||||
} |
||||
|
||||
@Override |
||||
protected char[] copyData(final int start, final int length) { |
||||
final char[] newData = new char[length]; |
||||
System.arraycopy(data, data.length - this.length + start, newData, 0, length); |
||||
return newData; |
||||
} |
||||
|
||||
public char getNextChar() { |
||||
return data[getNextPos()]; |
||||
} |
||||
|
||||
protected int getNextPos() { |
||||
return data.length - length; |
||||
} |
||||
|
||||
public char removeNext() { |
||||
final char ch = getNextChar(); |
||||
length--; |
||||
return ch; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Maps some Germanic characters to plain for internal processing. The following characters are mapped: |
||||
* <ul> |
||||
* <li>capital a, umlaut mark</li> |
||||
* <li>capital u, umlaut mark</li> |
||||
* <li>capital o, umlaut mark</li> |
||||
* <li>small sharp s, German</li> |
||||
* </ul> |
||||
*/ |
||||
private static final char[][] PREPROCESS_MAP = new char[][]{ |
||||
{'\u00C4', 'A'}, // capital a, umlaut mark
|
||||
{'\u00DC', 'U'}, // capital u, umlaut mark
|
||||
{'\u00D6', 'O'}, // capital o, umlaut mark
|
||||
{'\u00DF', 'S'} // small sharp s, German
|
||||
}; |
||||
|
||||
/* |
||||
* Returns whether the array contains the key, or not. |
||||
*/ |
||||
private static boolean arrayContains(final char[] arr, final char key) { |
||||
for (final char element : arr) { |
||||
if (element == key) { |
||||
return true; |
||||
} |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
/** |
||||
* <p> |
||||
* Implements the <i>Kölner Phonetik</i> algorithm. |
||||
* </p> |
||||
* <p> |
||||
* In contrast to the initial description of the algorithm, this implementation does the encoding in one pass. |
||||
* </p> |
||||
* |
||||
* @param text The source text to encode |
||||
* @return the corresponding encoding according to the <i>Kölner Phonetik</i> algorithm |
||||
*/ |
||||
public String colognePhonetic(String text) { |
||||
if (text == null) { |
||||
return null; |
||||
} |
||||
|
||||
text = preprocess(text); |
||||
|
||||
final CologneOutputBuffer output = new CologneOutputBuffer(text.length() * 2); |
||||
final CologneInputBuffer input = new CologneInputBuffer(text.toCharArray()); |
||||
|
||||
char nextChar; |
||||
|
||||
char lastChar = '-'; |
||||
char lastCode = '/'; |
||||
char code; |
||||
char chr; |
||||
|
||||
int rightLength = input.length(); |
||||
|
||||
while (rightLength > 0) { |
||||
chr = input.removeNext(); |
||||
|
||||
if ((rightLength = input.length()) > 0) { |
||||
nextChar = input.getNextChar(); |
||||
} else { |
||||
nextChar = '-'; |
||||
} |
||||
|
||||
if (arrayContains(AEIJOUY, chr)) { |
||||
code = '0'; |
||||
} else if (chr == 'H' || chr < 'A' || chr > 'Z') { |
||||
if (lastCode == '/') { |
||||
continue; |
||||
} |
||||
code = '-'; |
||||
} else if (chr == 'B' || (chr == 'P' && nextChar != 'H')) { |
||||
code = '1'; |
||||
} else if ((chr == 'D' || chr == 'T') && !arrayContains(SCZ, nextChar)) { |
||||
code = '2'; |
||||
} else if (arrayContains(WFPV, chr)) { |
||||
code = '3'; |
||||
} else if (arrayContains(GKQ, chr)) { |
||||
code = '4'; |
||||
} else if (chr == 'X' && !arrayContains(CKQ, lastChar)) { |
||||
code = '4'; |
||||
input.addLeft('S'); |
||||
rightLength++; |
||||
} else if (chr == 'S' || chr == 'Z') { |
||||
code = '8'; |
||||
} else if (chr == 'C') { |
||||
if (lastCode == '/') { |
||||
if (arrayContains(AHKLOQRUX, nextChar)) { |
||||
code = '4'; |
||||
} else { |
||||
code = '8'; |
||||
} |
||||
} else { |
||||
if (arrayContains(SZ, lastChar) || !arrayContains(AHOUKQX, nextChar)) { |
||||
code = '8'; |
||||
} else { |
||||
code = '4'; |
||||
} |
||||
} |
||||
} else if (arrayContains(TDX, chr)) { |
||||
code = '8'; |
||||
} else if (chr == 'R') { |
||||
code = '7'; |
||||
} else if (chr == 'L') { |
||||
code = '5'; |
||||
} else if (chr == 'M' || chr == 'N') { |
||||
code = '6'; |
||||
} else { |
||||
code = chr; |
||||
} |
||||
|
||||
if (code != '-' && (lastCode != code && (code != '0' || lastCode == '/') || code < '0' || code > '8')) { |
||||
output.addRight(code); |
||||
} |
||||
|
||||
lastChar = chr; |
||||
lastCode = code; |
||||
} |
||||
return output.toString(); |
||||
} |
||||
|
||||
@Override |
||||
public Object encode(final Object object) throws EncoderException { |
||||
if (!(object instanceof String)) { |
||||
throw new EncoderException("This method's parameter was expected to be of the type " + |
||||
String.class.getName() + |
||||
". But actually it was of the type " + |
||||
object.getClass().getName() + |
||||
"."); |
||||
} |
||||
return encode((String) object); |
||||
} |
||||
|
||||
@Override |
||||
public String encode(final String text) { |
||||
return colognePhonetic(text); |
||||
} |
||||
|
||||
public boolean isEncodeEqual(final String text1, final String text2) { |
||||
return colognePhonetic(text1).equals(colognePhonetic(text2)); |
||||
} |
||||
|
||||
/** |
||||
* Converts the string to upper case and replaces germanic characters as defined in {@link #PREPROCESS_MAP}. |
||||
*/ |
||||
private String preprocess(String text) { |
||||
text = text.toUpperCase(Locale.GERMAN); |
||||
|
||||
final char[] chrs = text.toCharArray(); |
||||
|
||||
for (int index = 0; index < chrs.length; index++) { |
||||
if (chrs[index] > 'Z') { |
||||
for (final char[] element : PREPROCESS_MAP) { |
||||
if (chrs[index] == element[0]) { |
||||
chrs[index] = element[1]; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
return new String(chrs); |
||||
} |
||||
} |
@ -0,0 +1,562 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
import java.io.InputStream; |
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.Collections; |
||||
import java.util.Comparator; |
||||
import java.util.HashMap; |
||||
import java.util.LinkedHashSet; |
||||
import java.util.List; |
||||
import java.util.Map; |
||||
import java.util.Scanner; |
||||
import java.util.Set; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.CharEncoding; |
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Encodes a string into a Daitch-Mokotoff Soundex value. |
||||
* <p> |
||||
* The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, yielding greater |
||||
* accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation but differences in spelling. |
||||
* </p> |
||||
* <p> |
||||
* The main differences compared to the other soundex variants are: |
||||
* </p> |
||||
* <ul> |
||||
* <li>coded names are 6 digits long |
||||
* <li>the initial character of the name is coded |
||||
* <li>rules to encoded multi-character n-grams |
||||
* <li>multiple possible encodings for the same name (branching) |
||||
* </ul> |
||||
* <p> |
||||
* This implementation supports branching, depending on the used method: |
||||
* <ul> |
||||
* <li>{@link #encode(String)} - branching disabled, only the first code will be returned |
||||
* <li>{@link #soundex(String)} - branching enabled, all codes will be returned, separated by '|' |
||||
* </ul> |
||||
* <p> |
||||
* Note: this implementation has additional branching rules compared to the original description of the algorithm. The |
||||
* rules can be customized by overriding the default rules contained in the resource file |
||||
* {@code org/apache/commons/codec/language/dmrules.txt}. |
||||
* </p> |
||||
* <p> |
||||
* This class is thread-safe. |
||||
* </p> |
||||
* |
||||
* @see Soundex |
||||
* @see <a href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex"> Wikipedia - Daitch-Mokotoff Soundex</a> |
||||
* @see <a href="http://www.avotaynu.com/soundex.htm">Avotaynu - Soundexing and Genealogy</a> |
||||
* |
||||
* @version $Id: DaitchMokotoffSoundex.java 1760691 2016-09-14 12:14:26Z jochen $ |
||||
* @since 1.10 |
||||
*/ |
||||
public class DaitchMokotoffSoundex implements StringEncoder { |
||||
|
||||
/** |
||||
* Inner class representing a branch during DM soundex encoding. |
||||
*/ |
||||
private static final class Branch { |
||||
private final StringBuilder builder; |
||||
private String cachedString; |
||||
private String lastReplacement; |
||||
|
||||
private Branch() { |
||||
builder = new StringBuilder(); |
||||
lastReplacement = null; |
||||
cachedString = null; |
||||
} |
||||
|
||||
/** |
||||
* Creates a new branch, identical to this branch. |
||||
* |
||||
* @return a new, identical branch |
||||
*/ |
||||
public Branch createBranch() { |
||||
final Branch branch = new Branch(); |
||||
branch.builder.append(toString()); |
||||
branch.lastReplacement = this.lastReplacement; |
||||
return branch; |
||||
} |
||||
|
||||
@Override |
||||
public boolean equals(final Object other) { |
||||
if (this == other) { |
||||
return true; |
||||
} |
||||
if (!(other instanceof Branch)) { |
||||
return false; |
||||
} |
||||
|
||||
return toString().equals(((Branch) other).toString()); |
||||
} |
||||
|
||||
/** |
||||
* Finish this branch by appending '0's until the maximum code length has been reached. |
||||
*/ |
||||
public void finish() { |
||||
while (builder.length() < MAX_LENGTH) { |
||||
builder.append('0'); |
||||
cachedString = null; |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public int hashCode() { |
||||
return toString().hashCode(); |
||||
} |
||||
|
||||
/** |
||||
* Process the next replacement to be added to this branch. |
||||
* |
||||
* @param replacement |
||||
* the next replacement to append |
||||
* @param forceAppend |
||||
* indicates if the default processing shall be overridden |
||||
*/ |
||||
public void processNextReplacement(final String replacement, final boolean forceAppend) { |
||||
final boolean append = lastReplacement == null || !lastReplacement.endsWith(replacement) || forceAppend; |
||||
|
||||
if (append && builder.length() < MAX_LENGTH) { |
||||
builder.append(replacement); |
||||
// remove all characters after the maximum length
|
||||
if (builder.length() > MAX_LENGTH) { |
||||
builder.delete(MAX_LENGTH, builder.length()); |
||||
} |
||||
cachedString = null; |
||||
} |
||||
|
||||
lastReplacement = replacement; |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
if (cachedString == null) { |
||||
cachedString = builder.toString(); |
||||
} |
||||
return cachedString; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Inner class for storing rules. |
||||
*/ |
||||
private static final class Rule { |
||||
private final String pattern; |
||||
private final String[] replacementAtStart; |
||||
private final String[] replacementBeforeVowel; |
||||
private final String[] replacementDefault; |
||||
|
||||
protected Rule(final String pattern, final String replacementAtStart, final String replacementBeforeVowel, |
||||
final String replacementDefault) { |
||||
this.pattern = pattern; |
||||
this.replacementAtStart = replacementAtStart.split("\\|"); |
||||
this.replacementBeforeVowel = replacementBeforeVowel.split("\\|"); |
||||
this.replacementDefault = replacementDefault.split("\\|"); |
||||
} |
||||
|
||||
public int getPatternLength() { |
||||
return pattern.length(); |
||||
} |
||||
|
||||
public String[] getReplacements(final String context, final boolean atStart) { |
||||
if (atStart) { |
||||
return replacementAtStart; |
||||
} |
||||
|
||||
final int nextIndex = getPatternLength(); |
||||
final boolean nextCharIsVowel = nextIndex < context.length() ? isVowel(context.charAt(nextIndex)) : false; |
||||
if (nextCharIsVowel) { |
||||
return replacementBeforeVowel; |
||||
} |
||||
|
||||
return replacementDefault; |
||||
} |
||||
|
||||
private boolean isVowel(final char ch) { |
||||
return ch == 'a' || ch == 'e' || ch == 'i' || ch == 'o' || ch == 'u'; |
||||
} |
||||
|
||||
public boolean matches(final String context) { |
||||
return context.startsWith(pattern); |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
return String.format("%s=(%s,%s,%s)", pattern, Arrays.asList(replacementAtStart), |
||||
Arrays.asList(replacementBeforeVowel), Arrays.asList(replacementDefault)); |
||||
} |
||||
} |
||||
|
||||
private static final String COMMENT = "//"; |
||||
private static final String DOUBLE_QUOTE = "\""; |
||||
|
||||
private static final String MULTILINE_COMMENT_END = "*/"; |
||||
|
||||
private static final String MULTILINE_COMMENT_START = "/*"; |
||||
|
||||
/** The resource file containing the replacement and folding rules */ |
||||
private static final String RESOURCE_FILE = "com/fr/third/org/apache/commons/codec/language/dmrules.txt"; |
||||
|
||||
/** The code length of a DM soundex value. */ |
||||
private static final int MAX_LENGTH = 6; |
||||
|
||||
/** Transformation rules indexed by the first character of their pattern. */ |
||||
private static final Map<Character, List<Rule>> RULES = new HashMap<Character, List<Rule>>(); |
||||
|
||||
/** Folding rules. */ |
||||
private static final Map<Character, Character> FOLDINGS = new HashMap<Character, Character>(); |
||||
|
||||
static { |
||||
final InputStream rulesIS = DaitchMokotoffSoundex.class.getClassLoader().getResourceAsStream(RESOURCE_FILE); |
||||
if (rulesIS == null) { |
||||
throw new IllegalArgumentException("Unable to load resource: " + RESOURCE_FILE); |
||||
} |
||||
|
||||
final Scanner scanner = new Scanner(rulesIS, CharEncoding.UTF_8); |
||||
try { |
||||
parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS); |
||||
} finally { |
||||
scanner.close(); |
||||
} |
||||
|
||||
// sort RULES by pattern length in descending order
|
||||
for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) { |
||||
final List<Rule> ruleList = rule.getValue(); |
||||
Collections.sort(ruleList, new Comparator<Rule>() { |
||||
@Override |
||||
public int compare(final Rule rule1, final Rule rule2) { |
||||
return rule2.getPatternLength() - rule1.getPatternLength(); |
||||
} |
||||
}); |
||||
} |
||||
} |
||||
|
||||
private static void parseRules(final Scanner scanner, final String location, |
||||
final Map<Character, List<Rule>> ruleMapping, final Map<Character, Character> asciiFoldings) { |
||||
int currentLine = 0; |
||||
boolean inMultilineComment = false; |
||||
|
||||
while (scanner.hasNextLine()) { |
||||
currentLine++; |
||||
final String rawLine = scanner.nextLine(); |
||||
String line = rawLine; |
||||
|
||||
if (inMultilineComment) { |
||||
if (line.endsWith(MULTILINE_COMMENT_END)) { |
||||
inMultilineComment = false; |
||||
} |
||||
continue; |
||||
} |
||||
|
||||
if (line.startsWith(MULTILINE_COMMENT_START)) { |
||||
inMultilineComment = true; |
||||
} else { |
||||
// discard comments
|
||||
final int cmtI = line.indexOf(COMMENT); |
||||
if (cmtI >= 0) { |
||||
line = line.substring(0, cmtI); |
||||
} |
||||
|
||||
// trim leading-trailing whitespace
|
||||
line = line.trim(); |
||||
|
||||
if (line.length() == 0) { |
||||
continue; // empty lines can be safely skipped
|
||||
} |
||||
|
||||
if (line.contains("=")) { |
||||
// folding
|
||||
final String[] parts = line.split("="); |
||||
if (parts.length != 2) { |
||||
throw new IllegalArgumentException("Malformed folding statement split into " + parts.length + |
||||
" parts: " + rawLine + " in " + location); |
||||
} |
||||
final String leftCharacter = parts[0]; |
||||
final String rightCharacter = parts[1]; |
||||
|
||||
if (leftCharacter.length() != 1 || rightCharacter.length() != 1) { |
||||
throw new IllegalArgumentException("Malformed folding statement - " + |
||||
"patterns are not single characters: " + rawLine + " in " + location); |
||||
} |
||||
|
||||
asciiFoldings.put(leftCharacter.charAt(0), rightCharacter.charAt(0)); |
||||
} else { |
||||
// rule
|
||||
final String[] parts = line.split("\\s+"); |
||||
if (parts.length != 4) { |
||||
throw new IllegalArgumentException("Malformed rule statement split into " + parts.length + |
||||
" parts: " + rawLine + " in " + location); |
||||
} |
||||
try { |
||||
final String pattern = stripQuotes(parts[0]); |
||||
final String replacement1 = stripQuotes(parts[1]); |
||||
final String replacement2 = stripQuotes(parts[2]); |
||||
final String replacement3 = stripQuotes(parts[3]); |
||||
|
||||
final Rule r = new Rule(pattern, replacement1, replacement2, replacement3); |
||||
final char patternKey = r.pattern.charAt(0); |
||||
List<Rule> rules = ruleMapping.get(patternKey); |
||||
if (rules == null) { |
||||
rules = new ArrayList<Rule>(); |
||||
ruleMapping.put(patternKey, rules); |
||||
} |
||||
rules.add(r); |
||||
} catch (final IllegalArgumentException e) { |
||||
throw new IllegalStateException( |
||||
"Problem parsing line '" + currentLine + "' in " + location, e); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
private static String stripQuotes(String str) { |
||||
if (str.startsWith(DOUBLE_QUOTE)) { |
||||
str = str.substring(1); |
||||
} |
||||
|
||||
if (str.endsWith(DOUBLE_QUOTE)) { |
||||
str = str.substring(0, str.length() - 1); |
||||
} |
||||
|
||||
return str; |
||||
} |
||||
|
||||
/** Whether to use ASCII folding prior to encoding. */ |
||||
private final boolean folding; |
||||
|
||||
/** |
||||
* Creates a new instance with ASCII-folding enabled. |
||||
*/ |
||||
public DaitchMokotoffSoundex() { |
||||
this(true); |
||||
} |
||||
|
||||
/** |
||||
* Creates a new instance. |
||||
* <p> |
||||
* With ASCII-folding enabled, certain accented characters will be transformed to equivalent ASCII characters, e.g. |
||||
* è -> e. |
||||
* </p> |
||||
* |
||||
* @param folding |
||||
* if ASCII-folding shall be performed before encoding |
||||
*/ |
||||
public DaitchMokotoffSoundex(final boolean folding) { |
||||
this.folding = folding; |
||||
} |
||||
|
||||
/** |
||||
* Performs a cleanup of the input string before the actual soundex transformation. |
||||
* <p> |
||||
* Removes all whitespace characters and performs ASCII folding if enabled. |
||||
* </p> |
||||
* |
||||
* @param input |
||||
* the input string to cleanup |
||||
* @return a cleaned up string |
||||
*/ |
||||
private String cleanup(final String input) { |
||||
final StringBuilder sb = new StringBuilder(); |
||||
for (char ch : input.toCharArray()) { |
||||
if (Character.isWhitespace(ch)) { |
||||
continue; |
||||
} |
||||
|
||||
ch = Character.toLowerCase(ch); |
||||
if (folding && FOLDINGS.containsKey(ch)) { |
||||
ch = FOLDINGS.get(ch); |
||||
} |
||||
sb.append(ch); |
||||
} |
||||
return sb.toString(); |
||||
} |
||||
|
||||
/** |
||||
* Encodes an Object using the Daitch-Mokotoff soundex algorithm without branching. |
||||
* <p> |
||||
* This method is provided in order to satisfy the requirements of the Encoder interface, and will throw an |
||||
* EncoderException if the supplied object is not of type java.lang.String. |
||||
* </p> |
||||
* |
||||
* @see #soundex(String) |
||||
* |
||||
* @param obj |
||||
* Object to encode |
||||
* @return An object (of type java.lang.String) containing the DM soundex code, which corresponds to the String |
||||
* supplied. |
||||
* @throws EncoderException |
||||
* if the parameter supplied is not of type java.lang.String |
||||
* @throws IllegalArgumentException |
||||
* if a character is not mapped |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object obj) throws EncoderException { |
||||
if (!(obj instanceof String)) { |
||||
throw new EncoderException( |
||||
"Parameter supplied to DaitchMokotoffSoundex encode is not of type java.lang.String"); |
||||
} |
||||
return encode((String) obj); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a String using the Daitch-Mokotoff soundex algorithm without branching. |
||||
* |
||||
* @see #soundex(String) |
||||
* |
||||
* @param source |
||||
* A String object to encode |
||||
* @return A DM Soundex code corresponding to the String supplied |
||||
* @throws IllegalArgumentException |
||||
* if a character is not mapped |
||||
*/ |
||||
@Override |
||||
public String encode(final String source) { |
||||
if (source == null) { |
||||
return null; |
||||
} |
||||
return soundex(source, false)[0]; |
||||
} |
||||
|
||||
/** |
||||
* Encodes a String using the Daitch-Mokotoff soundex algorithm with branching. |
||||
* <p> |
||||
* In case a string is encoded into multiple codes (see branching rules), the result will contain all codes, |
||||
* separated by '|'. |
||||
* </p> |
||||
* <p> |
||||
* Example: the name "AUERBACH" is encoded as both |
||||
* </p> |
||||
* <ul> |
||||
* <li>097400</li> |
||||
* <li>097500</li> |
||||
* </ul> |
||||
* <p> |
||||
* Thus the result will be "097400|097500". |
||||
* </p> |
||||
* |
||||
* @param source |
||||
* A String object to encode |
||||
* @return A string containing a set of DM Soundex codes corresponding to the String supplied |
||||
* @throws IllegalArgumentException |
||||
* if a character is not mapped |
||||
*/ |
||||
public String soundex(final String source) { |
||||
final String[] branches = soundex(source, true); |
||||
final StringBuilder sb = new StringBuilder(); |
||||
int index = 0; |
||||
for (final String branch : branches) { |
||||
sb.append(branch); |
||||
if (++index < branches.length) { |
||||
sb.append('|'); |
||||
} |
||||
} |
||||
return sb.toString(); |
||||
} |
||||
|
||||
/** |
||||
* Perform the actual DM Soundex algorithm on the input string. |
||||
* |
||||
* @param source |
||||
* A String object to encode |
||||
* @param branching |
||||
* If branching shall be performed |
||||
* @return A string array containing all DM Soundex codes corresponding to the String supplied depending on the |
||||
* selected branching mode |
||||
*/ |
||||
private String[] soundex(final String source, final boolean branching) { |
||||
if (source == null) { |
||||
return null; |
||||
} |
||||
|
||||
final String input = cleanup(source); |
||||
|
||||
final Set<Branch> currentBranches = new LinkedHashSet<Branch>(); |
||||
currentBranches.add(new Branch()); |
||||
|
||||
char lastChar = '\0'; |
||||
for (int index = 0; index < input.length(); index++) { |
||||
final char ch = input.charAt(index); |
||||
|
||||
// ignore whitespace inside a name
|
||||
if (Character.isWhitespace(ch)) { |
||||
continue; |
||||
} |
||||
|
||||
final String inputContext = input.substring(index); |
||||
final List<Rule> rules = RULES.get(ch); |
||||
if (rules == null) { |
||||
continue; |
||||
} |
||||
|
||||
// use an EMPTY_LIST to avoid false positive warnings wrt potential null pointer access
|
||||
@SuppressWarnings("unchecked") |
||||
final List<Branch> nextBranches = branching ? new ArrayList<Branch>() : Collections.EMPTY_LIST; |
||||
|
||||
for (final Rule rule : rules) { |
||||
if (rule.matches(inputContext)) { |
||||
if (branching) { |
||||
nextBranches.clear(); |
||||
} |
||||
final String[] replacements = rule.getReplacements(inputContext, lastChar == '\0'); |
||||
final boolean branchingRequired = replacements.length > 1 && branching; |
||||
|
||||
for (final Branch branch : currentBranches) { |
||||
for (final String nextReplacement : replacements) { |
||||
// if we have multiple replacements, always create a new branch
|
||||
final Branch nextBranch = branchingRequired ? branch.createBranch() : branch; |
||||
|
||||
// special rule: occurrences of mn or nm are treated differently
|
||||
final boolean force = (lastChar == 'm' && ch == 'n') || (lastChar == 'n' && ch == 'm'); |
||||
|
||||
nextBranch.processNextReplacement(nextReplacement, force); |
||||
|
||||
if (branching) { |
||||
nextBranches.add(nextBranch); |
||||
} else { |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
if (branching) { |
||||
currentBranches.clear(); |
||||
currentBranches.addAll(nextBranches); |
||||
} |
||||
index += rule.getPatternLength() - 1; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
lastChar = ch; |
||||
} |
||||
|
||||
final String[] result = new String[currentBranches.size()]; |
||||
int index = 0; |
||||
for (final Branch branch : currentBranches) { |
||||
branch.finish(); |
||||
result[index++] = branch.toString(); |
||||
} |
||||
|
||||
return result; |
||||
} |
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,423 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
import java.util.Locale; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Match Rating Approach Phonetic Algorithm Developed by <CITE>Western Airlines</CITE> in 1977. |
||||
* |
||||
* This class is immutable and thread-safe. |
||||
* |
||||
* @see <a href="http://en.wikipedia.org/wiki/Match_rating_approach">Wikipedia - Match Rating Approach</a> |
||||
* @since 1.8 |
||||
*/ |
||||
public class MatchRatingApproachEncoder implements StringEncoder { |
||||
|
||||
private static final String SPACE = " "; |
||||
|
||||
private static final String EMPTY = ""; |
||||
|
||||
/** |
||||
* Constants used mainly for the min rating value. |
||||
*/ |
||||
private static final int ONE = 1, TWO = 2, THREE = 3, FOUR = 4, FIVE = 5, SIX = 6, SEVEN = 7, |
||||
ELEVEN = 11, TWELVE = 12; |
||||
|
||||
/** |
||||
* The plain letter equivalent of the accented letters. |
||||
*/ |
||||
private static final String PLAIN_ASCII = "AaEeIiOoUu" + // grave
|
||||
"AaEeIiOoUuYy" + // acute
|
||||
"AaEeIiOoUuYy" + // circumflex
|
||||
"AaOoNn" + // tilde
|
||||
"AaEeIiOoUuYy" + // umlaut
|
||||
"Aa" + // ring
|
||||
"Cc" + // cedilla
|
||||
"OoUu"; // double acute
|
||||
|
||||
/** |
||||
* Unicode characters corresponding to various accented letters. For example: \u00DA is U acute etc... |
||||
*/ |
||||
private static final String UNICODE = "\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9" + |
||||
"\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD" + |
||||
"\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177" + |
||||
"\u00C3\u00E3\u00D5\u00F5\u00D1\u00F1" + |
||||
"\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF" + |
||||
"\u00C5\u00E5" + "\u00C7\u00E7" + "\u0150\u0151\u0170\u0171"; |
||||
|
||||
private static final String[] DOUBLE_CONSONANT = |
||||
new String[] { "BB", "CC", "DD", "FF", "GG", "HH", "JJ", "KK", "LL", "MM", "NN", "PP", "QQ", "RR", "SS", |
||||
"TT", "VV", "WW", "XX", "YY", "ZZ" }; |
||||
|
||||
/** |
||||
* Cleans up a name: 1. Upper-cases everything 2. Removes some common punctuation 3. Removes accents 4. Removes any |
||||
* spaces. |
||||
* |
||||
* <h2>API Usage</h2> |
||||
* <p> |
||||
* Consider this method private, it is package protected for unit testing only. |
||||
* </p> |
||||
* |
||||
* @param name |
||||
* The name to be cleaned |
||||
* @return The cleaned name |
||||
*/ |
||||
String cleanName(final String name) { |
||||
String upperName = name.toUpperCase(Locale.ENGLISH); |
||||
|
||||
final String[] charsToTrim = { "\\-", "[&]", "\\'", "\\.", "[\\,]" }; |
||||
for (final String str : charsToTrim) { |
||||
upperName = upperName.replaceAll(str, EMPTY); |
||||
} |
||||
|
||||
upperName = removeAccents(upperName); |
||||
upperName = upperName.replaceAll("\\s+", EMPTY); |
||||
|
||||
return upperName; |
||||
} |
||||
|
||||
/** |
||||
* Encodes an Object using the Match Rating Approach algorithm. Method is here to satisfy the requirements of the |
||||
* Encoder interface Throws an EncoderException if input object is not of type java.lang.String. |
||||
* |
||||
* @param pObject |
||||
* Object to encode |
||||
* @return An object (or type java.lang.String) containing the Match Rating Approach code which corresponds to the |
||||
* String supplied. |
||||
* @throws EncoderException |
||||
* if the parameter supplied is not of type java.lang.String |
||||
*/ |
||||
@Override |
||||
public final Object encode(final Object pObject) throws EncoderException { |
||||
if (!(pObject instanceof String)) { |
||||
throw new EncoderException( |
||||
"Parameter supplied to Match Rating Approach encoder is not of type java.lang.String"); |
||||
} |
||||
return encode((String) pObject); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a String using the Match Rating Approach (MRA) algorithm. |
||||
* |
||||
* @param name |
||||
* String object to encode |
||||
* @return The MRA code corresponding to the String supplied |
||||
*/ |
||||
@Override |
||||
public final String encode(String name) { |
||||
// Bulletproof for trivial input - NINO
|
||||
if (name == null || EMPTY.equalsIgnoreCase(name) || SPACE.equalsIgnoreCase(name) || name.length() == 1) { |
||||
return EMPTY; |
||||
} |
||||
|
||||
// Preprocessing
|
||||
name = cleanName(name); |
||||
|
||||
// BEGIN: Actual encoding part of the algorithm...
|
||||
// 1. Delete all vowels unless the vowel begins the word
|
||||
name = removeVowels(name); |
||||
|
||||
// 2. Remove second consonant from any double consonant
|
||||
name = removeDoubleConsonants(name); |
||||
|
||||
// 3. Reduce codex to 6 letters by joining the first 3 and last 3 letters
|
||||
name = getFirst3Last3(name); |
||||
|
||||
return name; |
||||
} |
||||
|
||||
/** |
||||
* Gets the first and last 3 letters of a name (if > 6 characters) Else just returns the name. |
||||
* |
||||
* <h2>API Usage</h2> |
||||
* <p> |
||||
* Consider this method private, it is package protected for unit testing only. |
||||
* </p> |
||||
* |
||||
* @param name |
||||
* The string to get the substrings from |
||||
* @return Annexed first and last 3 letters of input word. |
||||
*/ |
||||
String getFirst3Last3(final String name) { |
||||
final int nameLength = name.length(); |
||||
|
||||
if (nameLength > SIX) { |
||||
final String firstThree = name.substring(0, THREE); |
||||
final String lastThree = name.substring(nameLength - THREE, nameLength); |
||||
return firstThree + lastThree; |
||||
} |
||||
return name; |
||||
} |
||||
|
||||
/** |
||||
* Obtains the min rating of the length sum of the 2 names. In essence the larger the sum length the smaller the |
||||
* min rating. Values strictly from documentation. |
||||
* |
||||
* <h2>API Usage</h2> |
||||
* <p> |
||||
* Consider this method private, it is package protected for unit testing only. |
||||
* </p> |
||||
* |
||||
* @param sumLength |
||||
* The length of 2 strings sent down |
||||
* @return The min rating value |
||||
*/ |
||||
int getMinRating(final int sumLength) { |
||||
int minRating = 0; |
||||
|
||||
if (sumLength <= FOUR) { |
||||
minRating = FIVE; |
||||
} else if (sumLength <= SEVEN) { // aready know it is at least 5
|
||||
minRating = FOUR; |
||||
} else if (sumLength <= ELEVEN) { // aready know it is at least 8
|
||||
minRating = THREE; |
||||
} else if (sumLength == TWELVE) { |
||||
minRating = TWO; |
||||
} else { |
||||
minRating = ONE; // docs said little here.
|
||||
} |
||||
|
||||
return minRating; |
||||
} |
||||
|
||||
/** |
||||
* Determines if two names are homophonous via Match Rating Approach (MRA) algorithm. It should be noted that the |
||||
* strings are cleaned in the same way as {@link #encode(String)}. |
||||
* |
||||
* @param name1 |
||||
* First of the 2 strings (names) to compare |
||||
* @param name2 |
||||
* Second of the 2 names to compare |
||||
* @return <code>true</code> if the encodings are identical <code>false</code> otherwise. |
||||
*/ |
||||
public boolean isEncodeEquals(String name1, String name2) { |
||||
// Bulletproof for trivial input - NINO
|
||||
if (name1 == null || EMPTY.equalsIgnoreCase(name1) || SPACE.equalsIgnoreCase(name1)) { |
||||
return false; |
||||
} else if (name2 == null || EMPTY.equalsIgnoreCase(name2) || SPACE.equalsIgnoreCase(name2)) { |
||||
return false; |
||||
} else if (name1.length() == 1 || name2.length() == 1) { |
||||
return false; |
||||
} else if (name1.equalsIgnoreCase(name2)) { |
||||
return true; |
||||
} |
||||
|
||||
// Preprocessing
|
||||
name1 = cleanName(name1); |
||||
name2 = cleanName(name2); |
||||
|
||||
// Actual MRA Algorithm
|
||||
|
||||
// 1. Remove vowels
|
||||
name1 = removeVowels(name1); |
||||
name2 = removeVowels(name2); |
||||
|
||||
// 2. Remove double consonants
|
||||
name1 = removeDoubleConsonants(name1); |
||||
name2 = removeDoubleConsonants(name2); |
||||
|
||||
// 3. Reduce down to 3 letters
|
||||
name1 = getFirst3Last3(name1); |
||||
name2 = getFirst3Last3(name2); |
||||
|
||||
// 4. Check for length difference - if 3 or greater then no similarity
|
||||
// comparison is done
|
||||
if (Math.abs(name1.length() - name2.length()) >= THREE) { |
||||
return false; |
||||
} |
||||
|
||||
// 5. Obtain the minimum rating value by calculating the length sum of the
|
||||
// encoded Strings and sending it down.
|
||||
final int sumLength = Math.abs(name1.length() + name2.length()); |
||||
int minRating = 0; |
||||
minRating = getMinRating(sumLength); |
||||
|
||||
// 6. Process the encoded Strings from left to right and remove any
|
||||
// identical characters found from both Strings respectively.
|
||||
final int count = leftToRightThenRightToLeftProcessing(name1, name2); |
||||
|
||||
// 7. Each PNI item that has a similarity rating equal to or greater than
|
||||
// the min is considered to be a good candidate match
|
||||
return count >= minRating; |
||||
|
||||
} |
||||
|
||||
/** |
||||
* Determines if a letter is a vowel. |
||||
* |
||||
* <h2>API Usage</h2> |
||||
* <p> |
||||
* Consider this method private, it is package protected for unit testing only. |
||||
* </p> |
||||
* |
||||
* @param letter |
||||
* The letter under investiagtion |
||||
* @return True if a vowel, else false |
||||
*/ |
||||
boolean isVowel(final String letter) { |
||||
return letter.equalsIgnoreCase("E") || letter.equalsIgnoreCase("A") || letter.equalsIgnoreCase("O") || |
||||
letter.equalsIgnoreCase("I") || letter.equalsIgnoreCase("U"); |
||||
} |
||||
|
||||
/** |
||||
* Processes the names from left to right (first) then right to left removing identical letters in same positions. |
||||
* Then subtracts the longer string that remains from 6 and returns this. |
||||
* |
||||
* <h2>API Usage</h2> |
||||
* <p> |
||||
* Consider this method private, it is package protected for unit testing only. |
||||
* </p> |
||||
* |
||||
* @param name1 |
||||
* name2 |
||||
* @return the length as above |
||||
*/ |
||||
int leftToRightThenRightToLeftProcessing(final String name1, final String name2) { |
||||
final char[] name1Char = name1.toCharArray(); |
||||
final char[] name2Char = name2.toCharArray(); |
||||
|
||||
final int name1Size = name1.length() - 1; |
||||
final int name2Size = name2.length() - 1; |
||||
|
||||
String name1LtRStart = EMPTY; |
||||
String name1LtREnd = EMPTY; |
||||
|
||||
String name2RtLStart = EMPTY; |
||||
String name2RtLEnd = EMPTY; |
||||
|
||||
for (int i = 0; i < name1Char.length; i++) { |
||||
if (i > name2Size) { |
||||
break; |
||||
} |
||||
|
||||
name1LtRStart = name1.substring(i, i + 1); |
||||
name1LtREnd = name1.substring(name1Size - i, name1Size - i + 1); |
||||
|
||||
name2RtLStart = name2.substring(i, i + 1); |
||||
name2RtLEnd = name2.substring(name2Size - i, name2Size - i + 1); |
||||
|
||||
// Left to right...
|
||||
if (name1LtRStart.equals(name2RtLStart)) { |
||||
name1Char[i] = ' '; |
||||
name2Char[i] = ' '; |
||||
} |
||||
|
||||
// Right to left...
|
||||
if (name1LtREnd.equals(name2RtLEnd)) { |
||||
name1Char[name1Size - i] = ' '; |
||||
name2Char[name2Size - i] = ' '; |
||||
} |
||||
} |
||||
|
||||
// Char arrays -> string & remove extraneous space
|
||||
final String strA = new String(name1Char).replaceAll("\\s+", EMPTY); |
||||
final String strB = new String(name2Char).replaceAll("\\s+", EMPTY); |
||||
|
||||
// Final bit - subtract longest string from 6 and return this int value
|
||||
if (strA.length() > strB.length()) { |
||||
return Math.abs(SIX - strA.length()); |
||||
} |
||||
return Math.abs(SIX - strB.length()); |
||||
} |
||||
|
||||
/** |
||||
* Removes accented letters and replaces with non-accented ascii equivalent Case is preserved. |
||||
* http://www.codecodex.com/wiki/Remove_accent_from_letters_%28ex_.%C3%A9_to_e%29
|
||||
* |
||||
* @param accentedWord |
||||
* The word that may have accents in it. |
||||
* @return De-accented word |
||||
*/ |
||||
String removeAccents(final String accentedWord) { |
||||
if (accentedWord == null) { |
||||
return null; |
||||
} |
||||
|
||||
final StringBuilder sb = new StringBuilder(); |
||||
final int n = accentedWord.length(); |
||||
|
||||
for (int i = 0; i < n; i++) { |
||||
final char c = accentedWord.charAt(i); |
||||
final int pos = UNICODE.indexOf(c); |
||||
if (pos > -1) { |
||||
sb.append(PLAIN_ASCII.charAt(pos)); |
||||
} else { |
||||
sb.append(c); |
||||
} |
||||
} |
||||
|
||||
return sb.toString(); |
||||
} |
||||
|
||||
/** |
||||
* Replaces any double consonant pair with the single letter equivalent. |
||||
* |
||||
* <h2>API Usage</h2> |
||||
* <p> |
||||
* Consider this method private, it is package protected for unit testing only. |
||||
* </p> |
||||
* |
||||
* @param name |
||||
* String to have double consonants removed |
||||
* @return Single consonant word |
||||
*/ |
||||
String removeDoubleConsonants(final String name) { |
||||
String replacedName = name.toUpperCase(Locale.ENGLISH); |
||||
for (final String dc : DOUBLE_CONSONANT) { |
||||
if (replacedName.contains(dc)) { |
||||
final String singleLetter = dc.substring(0, 1); |
||||
replacedName = replacedName.replace(dc, singleLetter); |
||||
} |
||||
} |
||||
return replacedName; |
||||
} |
||||
|
||||
/** |
||||
* Deletes all vowels unless the vowel begins the word. |
||||
* |
||||
* <h2>API Usage</h2> |
||||
* <p> |
||||
* Consider this method private, it is package protected for unit testing only. |
||||
* </p> |
||||
* |
||||
* @param name |
||||
* The name to have vowels removed |
||||
* @return De-voweled word |
||||
*/ |
||||
String removeVowels(String name) { |
||||
// Extract first letter
|
||||
final String firstLetter = name.substring(0, 1); |
||||
|
||||
name = name.replaceAll("A", EMPTY); |
||||
name = name.replaceAll("E", EMPTY); |
||||
name = name.replaceAll("I", EMPTY); |
||||
name = name.replaceAll("O", EMPTY); |
||||
name = name.replaceAll("U", EMPTY); |
||||
|
||||
name = name.replaceAll("\\s{2,}\\b", SPACE); |
||||
|
||||
// return isVowel(firstLetter) ? (firstLetter + name) : name;
|
||||
if (isVowel(firstLetter)) { |
||||
return firstLetter + name; |
||||
} |
||||
return name; |
||||
} |
||||
} |
@ -0,0 +1,430 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Encodes a string into a Metaphone value. |
||||
* <p> |
||||
* Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>. |
||||
* Permission given by <CITE>wbrogden</CITE> for code to be used anywhere. |
||||
* <p> |
||||
* <CITE>Hanging on the Metaphone</CITE> by <CITE>Lawrence Philips</CITE> in <CITE>Computer Language of Dec. 1990, |
||||
* p 39.</CITE> |
||||
* <p> |
||||
* Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations: |
||||
* </p> |
||||
* <ul> |
||||
* <li><a href="http://search.cpan.org/~mschwern/Text-Metaphone-1.96/Metaphone.pm">Text:Metaphone-1.96</a> |
||||
* (broken link 4/30/2013) </li> |
||||
* <li><a href="https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm">Text:Metaphone-1.96</a> |
||||
* (link checked 4/30/2013) </li> |
||||
* </ul> |
||||
* <p> |
||||
* They have had undocumented changes from the originally published algorithm. |
||||
* For more information, see <a href="https://issues.apache.org/jira/browse/CODEC-57">CODEC-57</a>. |
||||
* <p> |
||||
* This class is conditionally thread-safe. |
||||
* The instance field {@link #maxCodeLen} is mutable {@link #setMaxCodeLen(int)} |
||||
* but is not volatile, and accesses are not synchronized. |
||||
* If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronization |
||||
* is used to ensure safe publication of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} |
||||
* after initial setup. |
||||
* |
||||
* @version $Id: Metaphone.java 1619948 2014-08-22 22:53:55Z ggregory $ |
||||
*/ |
||||
public class Metaphone implements StringEncoder { |
||||
|
||||
/** |
||||
* Five values in the English language |
||||
*/ |
||||
private static final String VOWELS = "AEIOU"; |
||||
|
||||
/** |
||||
* Variable used in Metaphone algorithm |
||||
*/ |
||||
private static final String FRONTV = "EIY"; |
||||
|
||||
/** |
||||
* Variable used in Metaphone algorithm |
||||
*/ |
||||
private static final String VARSON = "CSPTG"; |
||||
|
||||
/** |
||||
* The max code length for metaphone is 4 |
||||
*/ |
||||
private int maxCodeLen = 4; |
||||
|
||||
/** |
||||
* Creates an instance of the Metaphone encoder |
||||
*/ |
||||
public Metaphone() { |
||||
super(); |
||||
} |
||||
|
||||
/** |
||||
* Find the metaphone value of a String. This is similar to the |
||||
* soundex algorithm, but better at finding similar sounding words. |
||||
* All input is converted to upper case. |
||||
* Limitations: Input format is expected to be a single ASCII word |
||||
* with only characters in the A - Z range, no punctuation or numbers. |
||||
* |
||||
* @param txt String to find the metaphone code for |
||||
* @return A metaphone code corresponding to the String supplied |
||||
*/ |
||||
public String metaphone(final String txt) { |
||||
boolean hard = false; |
||||
int txtLength; |
||||
if (txt == null || (txtLength = txt.length()) == 0) { |
||||
return ""; |
||||
} |
||||
// single character is itself
|
||||
if (txtLength == 1) { |
||||
return txt.toUpperCase(java.util.Locale.ENGLISH); |
||||
} |
||||
|
||||
final char[] inwd = txt.toUpperCase(java.util.Locale.ENGLISH).toCharArray(); |
||||
|
||||
final StringBuilder local = new StringBuilder(40); // manipulate
|
||||
final StringBuilder code = new StringBuilder(10); // output
|
||||
// handle initial 2 characters exceptions
|
||||
switch(inwd[0]) { |
||||
case 'K': |
||||
case 'G': |
||||
case 'P': /* looking for KN, etc*/ |
||||
if (inwd[1] == 'N') { |
||||
local.append(inwd, 1, inwd.length - 1); |
||||
} else { |
||||
local.append(inwd); |
||||
} |
||||
break; |
||||
case 'A': /* looking for AE */ |
||||
if (inwd[1] == 'E') { |
||||
local.append(inwd, 1, inwd.length - 1); |
||||
} else { |
||||
local.append(inwd); |
||||
} |
||||
break; |
||||
case 'W': /* looking for WR or WH */ |
||||
if (inwd[1] == 'R') { // WR -> R
|
||||
local.append(inwd, 1, inwd.length - 1); |
||||
break; |
||||
} |
||||
if (inwd[1] == 'H') { |
||||
local.append(inwd, 1, inwd.length - 1); |
||||
local.setCharAt(0, 'W'); // WH -> W
|
||||
} else { |
||||
local.append(inwd); |
||||
} |
||||
break; |
||||
case 'X': /* initial X becomes S */ |
||||
inwd[0] = 'S'; |
||||
local.append(inwd); |
||||
break; |
||||
default: |
||||
local.append(inwd); |
||||
} // now local has working string with initials fixed
|
||||
|
||||
final int wdsz = local.length(); |
||||
int n = 0; |
||||
|
||||
while (code.length() < this.getMaxCodeLen() && |
||||
n < wdsz ) { // max code size of 4 works well
|
||||
final char symb = local.charAt(n); |
||||
// remove duplicate letters except C
|
||||
if (symb != 'C' && isPreviousChar( local, n, symb ) ) { |
||||
n++; |
||||
} else { // not dup
|
||||
switch(symb) { |
||||
case 'A': |
||||
case 'E': |
||||
case 'I': |
||||
case 'O': |
||||
case 'U': |
||||
if (n == 0) { |
||||
code.append(symb); |
||||
} |
||||
break; // only use vowel if leading char
|
||||
case 'B': |
||||
if ( isPreviousChar(local, n, 'M') && |
||||
isLastChar(wdsz, n) ) { // B is silent if word ends in MB
|
||||
break; |
||||
} |
||||
code.append(symb); |
||||
break; |
||||
case 'C': // lots of C special cases
|
||||
/* discard if SCI, SCE or SCY */ |
||||
if ( isPreviousChar(local, n, 'S') && |
||||
!isLastChar(wdsz, n) && |
||||
FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) { |
||||
break; |
||||
} |
||||
if (regionMatch(local, n, "CIA")) { // "CIA" -> X
|
||||
code.append('X'); |
||||
break; |
||||
} |
||||
if (!isLastChar(wdsz, n) && |
||||
FRONTV.indexOf(local.charAt(n + 1)) >= 0) { |
||||
code.append('S'); |
||||
break; // CI,CE,CY -> S
|
||||
} |
||||
if (isPreviousChar(local, n, 'S') && |
||||
isNextChar(local, n, 'H') ) { // SCH->sk
|
||||
code.append('K'); |
||||
break; |
||||
} |
||||
if (isNextChar(local, n, 'H')) { // detect CH
|
||||
if (n == 0 && |
||||
wdsz >= 3 && |
||||
isVowel(local,2) ) { // CH consonant -> K consonant
|
||||
code.append('K'); |
||||
} else { |
||||
code.append('X'); // CHvowel -> X
|
||||
} |
||||
} else { |
||||
code.append('K'); |
||||
} |
||||
break; |
||||
case 'D': |
||||
if (!isLastChar(wdsz, n + 1) && |
||||
isNextChar(local, n, 'G') && |
||||
FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
|
||||
code.append('J'); n += 2; |
||||
} else { |
||||
code.append('T'); |
||||
} |
||||
break; |
||||
case 'G': // GH silent at end or before consonant
|
||||
if (isLastChar(wdsz, n + 1) && |
||||
isNextChar(local, n, 'H')) { |
||||
break; |
||||
} |
||||
if (!isLastChar(wdsz, n + 1) && |
||||
isNextChar(local,n,'H') && |
||||
!isVowel(local,n+2)) { |
||||
break; |
||||
} |
||||
if (n > 0 && |
||||
( regionMatch(local, n, "GN") || |
||||
regionMatch(local, n, "GNED") ) ) { |
||||
break; // silent G
|
||||
} |
||||
if (isPreviousChar(local, n, 'G')) { |
||||
// NOTE: Given that duplicated chars are removed, I don't see how this can ever be true
|
||||
hard = true; |
||||
} else { |
||||
hard = false; |
||||
} |
||||
if (!isLastChar(wdsz, n) && |
||||
FRONTV.indexOf(local.charAt(n + 1)) >= 0 && |
||||
!hard) { |
||||
code.append('J'); |
||||
} else { |
||||
code.append('K'); |
||||
} |
||||
break; |
||||
case 'H': |
||||
if (isLastChar(wdsz, n)) { |
||||
break; // terminal H
|
||||
} |
||||
if (n > 0 && |
||||
VARSON.indexOf(local.charAt(n - 1)) >= 0) { |
||||
break; |
||||
} |
||||
if (isVowel(local,n+1)) { |
||||
code.append('H'); // Hvowel
|
||||
} |
||||
break; |
||||
case 'F': |
||||
case 'J': |
||||
case 'L': |
||||
case 'M': |
||||
case 'N': |
||||
case 'R': |
||||
code.append(symb); |
||||
break; |
||||
case 'K': |
||||
if (n > 0) { // not initial
|
||||
if (!isPreviousChar(local, n, 'C')) { |
||||
code.append(symb); |
||||
} |
||||
} else { |
||||
code.append(symb); // initial K
|
||||
} |
||||
break; |
||||
case 'P': |
||||
if (isNextChar(local,n,'H')) { |
||||
// PH -> F
|
||||
code.append('F'); |
||||
} else { |
||||
code.append(symb); |
||||
} |
||||
break; |
||||
case 'Q': |
||||
code.append('K'); |
||||
break; |
||||
case 'S': |
||||
if (regionMatch(local,n,"SH") || |
||||
regionMatch(local,n,"SIO") || |
||||
regionMatch(local,n,"SIA")) { |
||||
code.append('X'); |
||||
} else { |
||||
code.append('S'); |
||||
} |
||||
break; |
||||
case 'T': |
||||
if (regionMatch(local,n,"TIA") || |
||||
regionMatch(local,n,"TIO")) { |
||||
code.append('X'); |
||||
break; |
||||
} |
||||
if (regionMatch(local,n,"TCH")) { |
||||
// Silent if in "TCH"
|
||||
break; |
||||
} |
||||
// substitute numeral 0 for TH (resembles theta after all)
|
||||
if (regionMatch(local,n,"TH")) { |
||||
code.append('0'); |
||||
} else { |
||||
code.append('T'); |
||||
} |
||||
break; |
||||
case 'V': |
||||
code.append('F'); break; |
||||
case 'W': |
||||
case 'Y': // silent if not followed by vowel
|
||||
if (!isLastChar(wdsz,n) && |
||||
isVowel(local,n+1)) { |
||||
code.append(symb); |
||||
} |
||||
break; |
||||
case 'X': |
||||
code.append('K'); |
||||
code.append('S'); |
||||
break; |
||||
case 'Z': |
||||
code.append('S'); |
||||
break; |
||||
default: |
||||
// do nothing
|
||||
break; |
||||
} // end switch
|
||||
n++; |
||||
} // end else from symb != 'C'
|
||||
if (code.length() > this.getMaxCodeLen()) { |
||||
code.setLength(this.getMaxCodeLen()); |
||||
} |
||||
} |
||||
return code.toString(); |
||||
} |
||||
|
||||
private boolean isVowel(final StringBuilder string, final int index) { |
||||
return VOWELS.indexOf(string.charAt(index)) >= 0; |
||||
} |
||||
|
||||
private boolean isPreviousChar(final StringBuilder string, final int index, final char c) { |
||||
boolean matches = false; |
||||
if( index > 0 && |
||||
index < string.length() ) { |
||||
matches = string.charAt(index - 1) == c; |
||||
} |
||||
return matches; |
||||
} |
||||
|
||||
private boolean isNextChar(final StringBuilder string, final int index, final char c) { |
||||
boolean matches = false; |
||||
if( index >= 0 && |
||||
index < string.length() - 1 ) { |
||||
matches = string.charAt(index + 1) == c; |
||||
} |
||||
return matches; |
||||
} |
||||
|
||||
private boolean regionMatch(final StringBuilder string, final int index, final String test) { |
||||
boolean matches = false; |
||||
if( index >= 0 && |
||||
index + test.length() - 1 < string.length() ) { |
||||
final String substring = string.substring( index, index + test.length()); |
||||
matches = substring.equals( test ); |
||||
} |
||||
return matches; |
||||
} |
||||
|
||||
private boolean isLastChar(final int wdsz, final int n) { |
||||
return n + 1 == wdsz; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Encodes an Object using the metaphone algorithm. This method |
||||
* is provided in order to satisfy the requirements of the |
||||
* Encoder interface, and will throw an EncoderException if the |
||||
* supplied object is not of type java.lang.String. |
||||
* |
||||
* @param obj Object to encode |
||||
* @return An object (or type java.lang.String) containing the |
||||
* metaphone code which corresponds to the String supplied. |
||||
* @throws EncoderException if the parameter supplied is not |
||||
* of type java.lang.String |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object obj) throws EncoderException { |
||||
if (!(obj instanceof String)) { |
||||
throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String"); |
||||
} |
||||
return metaphone((String) obj); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a String using the Metaphone algorithm. |
||||
* |
||||
* @param str String object to encode |
||||
* @return The metaphone code corresponding to the String supplied |
||||
*/ |
||||
@Override |
||||
public String encode(final String str) { |
||||
return metaphone(str); |
||||
} |
||||
|
||||
/** |
||||
* Tests is the metaphones of two strings are identical. |
||||
* |
||||
* @param str1 First of two strings to compare |
||||
* @param str2 Second of two strings to compare |
||||
* @return <code>true</code> if the metaphones of these strings are identical, |
||||
* <code>false</code> otherwise. |
||||
*/ |
||||
public boolean isMetaphoneEqual(final String str1, final String str2) { |
||||
return metaphone(str1).equals(metaphone(str2)); |
||||
} |
||||
|
||||
/** |
||||
* Returns the maxCodeLen. |
||||
* @return int |
||||
*/ |
||||
public int getMaxCodeLen() { return this.maxCodeLen; } |
||||
|
||||
/** |
||||
* Sets the maxCodeLen. |
||||
* @param maxCodeLen The maxCodeLen to set |
||||
*/ |
||||
public void setMaxCodeLen(final int maxCodeLen) { this.maxCodeLen = maxCodeLen; } |
||||
|
||||
} |
@ -0,0 +1,318 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
import java.util.regex.Pattern; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Encodes a string into a NYSIIS value. NYSIIS is an encoding used to relate similar names, but can also be used as a |
||||
* general purpose scheme to find word with similar phonemes. |
||||
* <p> |
||||
* NYSIIS features an accuracy increase of 2.7% over the traditional Soundex algorithm. |
||||
* <p> |
||||
* Algorithm description: |
||||
* <pre> |
||||
* 1. Transcode first characters of name |
||||
* 1a. MAC -> MCC |
||||
* 1b. KN -> NN |
||||
* 1c. K -> C |
||||
* 1d. PH -> FF |
||||
* 1e. PF -> FF |
||||
* 1f. SCH -> SSS |
||||
* 2. Transcode last characters of name |
||||
* 2a. EE, IE -> Y |
||||
* 2b. DT,RT,RD,NT,ND -> D |
||||
* 3. First character of key = first character of name |
||||
* 4. Transcode remaining characters by following these rules, incrementing by one character each time |
||||
* 4a. EV -> AF else A,E,I,O,U -> A |
||||
* 4b. Q -> G |
||||
* 4c. Z -> S |
||||
* 4d. M -> N |
||||
* 4e. KN -> N else K -> C |
||||
* 4f. SCH -> SSS |
||||
* 4g. PH -> FF |
||||
* 4h. H -> If previous or next is nonvowel, previous |
||||
* 4i. W -> If previous is vowel, previous |
||||
* 4j. Add current to key if current != last key character |
||||
* 5. If last character is S, remove it |
||||
* 6. If last characters are AY, replace with Y |
||||
* 7. If last character is A, remove it |
||||
* 8. Collapse all strings of repeated characters |
||||
* 9. Add original first character of name as first character of key |
||||
* </pre> |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* |
||||
* @see <a href="http://en.wikipedia.org/wiki/NYSIIS">NYSIIS on Wikipedia</a> |
||||
* @see <a href="http://www.dropby.com/NYSIIS.html">NYSIIS on dropby.com</a> |
||||
* @see Soundex |
||||
* @since 1.7 |
||||
* @version $Id: Nysiis.java 1725161 2016-01-18 01:08:56Z ggregory $ |
||||
*/ |
||||
public class Nysiis implements StringEncoder { |
||||
|
||||
private static final char[] CHARS_A = new char[] { 'A' }; |
||||
private static final char[] CHARS_AF = new char[] { 'A', 'F' }; |
||||
private static final char[] CHARS_C = new char[] { 'C' }; |
||||
private static final char[] CHARS_FF = new char[] { 'F', 'F' }; |
||||
private static final char[] CHARS_G = new char[] { 'G' }; |
||||
private static final char[] CHARS_N = new char[] { 'N' }; |
||||
private static final char[] CHARS_NN = new char[] { 'N', 'N' }; |
||||
private static final char[] CHARS_S = new char[] { 'S' }; |
||||
private static final char[] CHARS_SSS = new char[] { 'S', 'S', 'S' }; |
||||
|
||||
private static final Pattern PAT_MAC = Pattern.compile("^MAC"); |
||||
private static final Pattern PAT_KN = Pattern.compile("^KN"); |
||||
private static final Pattern PAT_K = Pattern.compile("^K"); |
||||
private static final Pattern PAT_PH_PF = Pattern.compile("^(PH|PF)"); |
||||
private static final Pattern PAT_SCH = Pattern.compile("^SCH"); |
||||
private static final Pattern PAT_EE_IE = Pattern.compile("(EE|IE)$"); |
||||
private static final Pattern PAT_DT_ETC = Pattern.compile("(DT|RT|RD|NT|ND)$"); |
||||
|
||||
private static final char SPACE = ' '; |
||||
private static final int TRUE_LENGTH = 6; |
||||
|
||||
/** |
||||
* Tests if the given character is a vowel. |
||||
* |
||||
* @param c |
||||
* the character to test |
||||
* @return <code>true</code> if the character is a vowel, <code>false</code> otherwise |
||||
*/ |
||||
private static boolean isVowel(final char c) { |
||||
return c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U'; |
||||
} |
||||
|
||||
/** |
||||
* Transcodes the remaining parts of the String. The method operates on a sliding window, looking at 4 characters at |
||||
* a time: [i-1, i, i+1, i+2]. |
||||
* |
||||
* @param prev |
||||
* the previous character |
||||
* @param curr |
||||
* the current character |
||||
* @param next |
||||
* the next character |
||||
* @param aNext |
||||
* the after next character |
||||
* @return a transcoded array of characters, starting from the current position |
||||
*/ |
||||
private static char[] transcodeRemaining(final char prev, final char curr, final char next, final char aNext) { |
||||
// 1. EV -> AF
|
||||
if (curr == 'E' && next == 'V') { |
||||
return CHARS_AF; |
||||
} |
||||
|
||||
// A, E, I, O, U -> A
|
||||
if (isVowel(curr)) { |
||||
return CHARS_A; |
||||
} |
||||
|
||||
// 2. Q -> G, Z -> S, M -> N
|
||||
if (curr == 'Q') { |
||||
return CHARS_G; |
||||
} else if (curr == 'Z') { |
||||
return CHARS_S; |
||||
} else if (curr == 'M') { |
||||
return CHARS_N; |
||||
} |
||||
|
||||
// 3. KN -> NN else K -> C
|
||||
if (curr == 'K') { |
||||
if (next == 'N') { |
||||
return CHARS_NN; |
||||
} |
||||
return CHARS_C; |
||||
} |
||||
|
||||
// 4. SCH -> SSS
|
||||
if (curr == 'S' && next == 'C' && aNext == 'H') { |
||||
return CHARS_SSS; |
||||
} |
||||
|
||||
// PH -> FF
|
||||
if (curr == 'P' && next == 'H') { |
||||
return CHARS_FF; |
||||
} |
||||
|
||||
// 5. H -> If previous or next is a non vowel, previous.
|
||||
if (curr == 'H' && (!isVowel(prev) || !isVowel(next))) { |
||||
return new char[] { prev }; |
||||
} |
||||
|
||||
// 6. W -> If previous is vowel, previous.
|
||||
if (curr == 'W' && isVowel(prev)) { |
||||
return new char[] { prev }; |
||||
} |
||||
|
||||
return new char[] { curr }; |
||||
} |
||||
|
||||
/** Indicates the strict mode. */ |
||||
private final boolean strict; |
||||
|
||||
/** |
||||
* Creates an instance of the {@link Nysiis} encoder with strict mode (original form), |
||||
* i.e. encoded strings have a maximum length of 6. |
||||
*/ |
||||
public Nysiis() { |
||||
this(true); |
||||
} |
||||
|
||||
/** |
||||
* Create an instance of the {@link Nysiis} encoder with the specified strict mode: |
||||
* |
||||
* <ul> |
||||
* <li><code>true</code>: encoded strings have a maximum length of 6</li> |
||||
* <li><code>false</code>: encoded strings may have arbitrary length</li> |
||||
* </ul> |
||||
* |
||||
* @param strict |
||||
* the strict mode |
||||
*/ |
||||
public Nysiis(final boolean strict) { |
||||
this.strict = strict; |
||||
} |
||||
|
||||
/** |
||||
* Encodes an Object using the NYSIIS algorithm. This method is provided in order to satisfy the requirements of the |
||||
* Encoder interface, and will throw an {@link EncoderException} if the supplied object is not of type |
||||
* {@link String}. |
||||
* |
||||
* @param obj |
||||
* Object to encode |
||||
* @return An object (or a {@link String}) containing the NYSIIS code which corresponds to the given String. |
||||
* @throws EncoderException |
||||
* if the parameter supplied is not of a {@link String} |
||||
* @throws IllegalArgumentException |
||||
* if a character is not mapped |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object obj) throws EncoderException { |
||||
if (!(obj instanceof String)) { |
||||
throw new EncoderException("Parameter supplied to Nysiis encode is not of type java.lang.String"); |
||||
} |
||||
return this.nysiis((String) obj); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a String using the NYSIIS algorithm. |
||||
* |
||||
* @param str |
||||
* A String object to encode |
||||
* @return A Nysiis code corresponding to the String supplied |
||||
* @throws IllegalArgumentException |
||||
* if a character is not mapped |
||||
*/ |
||||
@Override |
||||
public String encode(final String str) { |
||||
return this.nysiis(str); |
||||
} |
||||
|
||||
/** |
||||
* Indicates the strict mode for this {@link Nysiis} encoder. |
||||
* |
||||
* @return <code>true</code> if the encoder is configured for strict mode, <code>false</code> otherwise |
||||
*/ |
||||
public boolean isStrict() { |
||||
return this.strict; |
||||
} |
||||
|
||||
/** |
||||
* Retrieves the NYSIIS code for a given String object. |
||||
* |
||||
* @param str |
||||
* String to encode using the NYSIIS algorithm |
||||
* @return A NYSIIS code for the String supplied |
||||
*/ |
||||
public String nysiis(String str) { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
|
||||
// Use the same clean rules as Soundex
|
||||
str = SoundexUtils.clean(str); |
||||
|
||||
if (str.length() == 0) { |
||||
return str; |
||||
} |
||||
|
||||
// Translate first characters of name:
|
||||
// MAC -> MCC, KN -> NN, K -> C, PH | PF -> FF, SCH -> SSS
|
||||
str = PAT_MAC.matcher(str).replaceFirst("MCC"); |
||||
str = PAT_KN.matcher(str).replaceFirst("NN"); |
||||
str = PAT_K.matcher(str).replaceFirst("C"); |
||||
str = PAT_PH_PF.matcher(str).replaceFirst("FF"); |
||||
str = PAT_SCH.matcher(str).replaceFirst("SSS"); |
||||
|
||||
// Translate last characters of name:
|
||||
// EE -> Y, IE -> Y, DT | RT | RD | NT | ND -> D
|
||||
str = PAT_EE_IE.matcher(str).replaceFirst("Y"); |
||||
str = PAT_DT_ETC.matcher(str).replaceFirst("D"); |
||||
|
||||
// First character of key = first character of name.
|
||||
final StringBuilder key = new StringBuilder(str.length()); |
||||
key.append(str.charAt(0)); |
||||
|
||||
// Transcode remaining characters, incrementing by one character each time
|
||||
final char[] chars = str.toCharArray(); |
||||
final int len = chars.length; |
||||
|
||||
for (int i = 1; i < len; i++) { |
||||
final char next = i < len - 1 ? chars[i + 1] : SPACE; |
||||
final char aNext = i < len - 2 ? chars[i + 2] : SPACE; |
||||
final char[] transcoded = transcodeRemaining(chars[i - 1], chars[i], next, aNext); |
||||
System.arraycopy(transcoded, 0, chars, i, transcoded.length); |
||||
|
||||
// only append the current char to the key if it is different from the last one
|
||||
if (chars[i] != chars[i - 1]) { |
||||
key.append(chars[i]); |
||||
} |
||||
} |
||||
|
||||
if (key.length() > 1) { |
||||
char lastChar = key.charAt(key.length() - 1); |
||||
|
||||
// If last character is S, remove it.
|
||||
if (lastChar == 'S') { |
||||
key.deleteCharAt(key.length() - 1); |
||||
lastChar = key.charAt(key.length() - 1); |
||||
} |
||||
|
||||
if (key.length() > 2) { |
||||
final char last2Char = key.charAt(key.length() - 2); |
||||
// If last characters are AY, replace with Y.
|
||||
if (last2Char == 'A' && lastChar == 'Y') { |
||||
key.deleteCharAt(key.length() - 2); |
||||
} |
||||
} |
||||
|
||||
// If last character is A, remove it.
|
||||
if (lastChar == 'A') { |
||||
key.deleteCharAt(key.length() - 1); |
||||
} |
||||
} |
||||
|
||||
final String string = key.toString(); |
||||
return this.isStrict() ? string.substring(0, Math.min(TRUE_LENGTH, string.length())) : string; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,219 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Encodes a string into a Refined Soundex value. A refined soundex code is |
||||
* optimized for spell checking words. Soundex method originally developed by |
||||
* <CITE>Margaret Odell</CITE> and <CITE>Robert Russell</CITE>. |
||||
* |
||||
* <p>This class is immutable and thread-safe.</p> |
||||
* |
||||
* @version $Id: RefinedSoundex.java 1811347 2017-10-06 15:21:18Z ggregory $ |
||||
*/ |
||||
public class RefinedSoundex implements StringEncoder { |
||||
|
||||
/** |
||||
* Mapping: |
||||
* <pre> |
||||
* 0: A E I O U Y H W |
||||
* 1: B P |
||||
* 2: F V |
||||
* 3: C K S |
||||
* 4: G J |
||||
* 5: Q X Z |
||||
* 6: D T |
||||
* 7: L |
||||
* 8: M N |
||||
* 9: R |
||||
* </pre> |
||||
* @since 1.4 |
||||
*/ |
||||
// ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
public static final String US_ENGLISH_MAPPING_STRING = "01360240043788015936020505"; |
||||
|
||||
/** |
||||
* RefinedSoundex is *refined* for a number of reasons one being that the |
||||
* mappings have been altered. This implementation contains default |
||||
* mappings for US English. |
||||
*/ |
||||
private static final char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.toCharArray(); |
||||
|
||||
/** |
||||
* Every letter of the alphabet is "mapped" to a numerical value. This char |
||||
* array holds the values to which each letter is mapped. This |
||||
* implementation contains a default map for US_ENGLISH |
||||
*/ |
||||
private final char[] soundexMapping; |
||||
|
||||
/** |
||||
* This static variable contains an instance of the RefinedSoundex using |
||||
* the US_ENGLISH mapping. |
||||
*/ |
||||
public static final RefinedSoundex US_ENGLISH = new RefinedSoundex(); |
||||
|
||||
/** |
||||
* Creates an instance of the RefinedSoundex object using the default US |
||||
* English mapping. |
||||
*/ |
||||
public RefinedSoundex() { |
||||
this.soundexMapping = US_ENGLISH_MAPPING; |
||||
} |
||||
|
||||
/** |
||||
* Creates a refined soundex instance using a custom mapping. This |
||||
* constructor can be used to customize the mapping, and/or possibly |
||||
* provide an internationalized mapping for a non-Western character set. |
||||
* |
||||
* @param mapping |
||||
* Mapping array to use when finding the corresponding code for |
||||
* a given character |
||||
*/ |
||||
public RefinedSoundex(final char[] mapping) { |
||||
this.soundexMapping = new char[mapping.length]; |
||||
System.arraycopy(mapping, 0, this.soundexMapping, 0, mapping.length); |
||||
} |
||||
|
||||
/** |
||||
* Creates a refined Soundex instance using a custom mapping. This constructor can be used to customize the mapping, |
||||
* and/or possibly provide an internationalized mapping for a non-Western character set. |
||||
* |
||||
* @param mapping |
||||
* Mapping string to use when finding the corresponding code for a given character |
||||
* @since 1.4 |
||||
*/ |
||||
public RefinedSoundex(final String mapping) { |
||||
this.soundexMapping = mapping.toCharArray(); |
||||
} |
||||
|
||||
/** |
||||
* Returns the number of characters in the two encoded Strings that are the |
||||
* same. This return value ranges from 0 to the length of the shortest |
||||
* encoded String: 0 indicates little or no similarity, and 4 out of 4 (for |
||||
* example) indicates strong similarity or identical values. For refined |
||||
* Soundex, the return value can be greater than 4. |
||||
* |
||||
* @param s1 |
||||
* A String that will be encoded and compared. |
||||
* @param s2 |
||||
* A String that will be encoded and compared. |
||||
* @return The number of characters in the two encoded Strings that are the |
||||
* same from 0 to to the length of the shortest encoded String. |
||||
* |
||||
* @see SoundexUtils#difference(StringEncoder,String,String) |
||||
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> |
||||
* MS T-SQL DIFFERENCE</a> |
||||
* |
||||
* @throws EncoderException |
||||
* if an error occurs encoding one of the strings |
||||
* @since 1.3 |
||||
*/ |
||||
public int difference(final String s1, final String s2) throws EncoderException { |
||||
return SoundexUtils.difference(this, s1, s2); |
||||
} |
||||
|
||||
/** |
||||
* Encodes an Object using the refined soundex algorithm. This method is |
||||
* provided in order to satisfy the requirements of the Encoder interface, |
||||
* and will throw an EncoderException if the supplied object is not of type |
||||
* java.lang.String. |
||||
* |
||||
* @param obj |
||||
* Object to encode |
||||
* @return An object (or type java.lang.String) containing the refined |
||||
* soundex code which corresponds to the String supplied. |
||||
* @throws EncoderException |
||||
* if the parameter supplied is not of type java.lang.String |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object obj) throws EncoderException { |
||||
if (!(obj instanceof String)) { |
||||
throw new EncoderException("Parameter supplied to RefinedSoundex encode is not of type java.lang.String"); |
||||
} |
||||
return soundex((String) obj); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a String using the refined soundex algorithm. |
||||
* |
||||
* @param str |
||||
* A String object to encode |
||||
* @return A Soundex code corresponding to the String supplied |
||||
*/ |
||||
@Override |
||||
public String encode(final String str) { |
||||
return soundex(str); |
||||
} |
||||
|
||||
/** |
||||
* Returns the mapping code for a given character. The mapping codes are |
||||
* maintained in an internal char array named soundexMapping, and the |
||||
* default values of these mappings are US English. |
||||
* |
||||
* @param c |
||||
* char to get mapping for |
||||
* @return A character (really a numeral) to return for the given char |
||||
*/ |
||||
char getMappingCode(final char c) { |
||||
if (!Character.isLetter(c)) { |
||||
return 0; |
||||
} |
||||
return this.soundexMapping[Character.toUpperCase(c) - 'A']; |
||||
} |
||||
|
||||
/** |
||||
* Retrieves the Refined Soundex code for a given String object. |
||||
* |
||||
* @param str |
||||
* String to encode using the Refined Soundex algorithm |
||||
* @return A soundex code for the String supplied |
||||
*/ |
||||
public String soundex(String str) { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
str = SoundexUtils.clean(str); |
||||
if (str.length() == 0) { |
||||
return str; |
||||
} |
||||
|
||||
final StringBuilder sBuf = new StringBuilder(); |
||||
sBuf.append(str.charAt(0)); |
||||
|
||||
char last, current; |
||||
last = '*'; |
||||
|
||||
for (int i = 0; i < str.length(); i++) { |
||||
|
||||
current = getMappingCode(str.charAt(i)); |
||||
if (current == last) { |
||||
continue; |
||||
} else if (current != 0) { |
||||
sBuf.append(current); |
||||
} |
||||
|
||||
last = current; |
||||
|
||||
} |
||||
|
||||
return sBuf.toString(); |
||||
} |
||||
} |
@ -0,0 +1,337 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names, but can also be used as a |
||||
* general purpose scheme to find word with similar phonemes. |
||||
* |
||||
* This class is thread-safe. |
||||
* Although not strictly immutable, the {@link #maxLength} field is not actually used. |
||||
* |
||||
* @version $Id: Soundex.java 1811347 2017-10-06 15:21:18Z ggregory $ |
||||
*/ |
||||
public class Soundex implements StringEncoder { |
||||
|
||||
/** |
||||
* The marker character used to indicate a silent (ignored) character. |
||||
* These are ignored except when they appear as the first character. |
||||
* <p> |
||||
* Note: the {@link #US_ENGLISH_MAPPING_STRING} does not use this mechanism |
||||
* because changing it might break existing code. Mappings that don't contain |
||||
* a silent marker code are treated as though H and W are silent. |
||||
* <p> |
||||
* To override this, use the {@link #Soundex(String, boolean)} constructor. |
||||
* @since 1.11 |
||||
*/ |
||||
public static final char SILENT_MARKER = '-'; |
||||
|
||||
/** |
||||
* This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position |
||||
* means do not encode, but treat as a separator when it occurs between consonants with the same code. |
||||
* <p> |
||||
* (This constant is provided as both an implementation convenience and to allow Javadoc to pick |
||||
* up the value for the constant values page.) |
||||
* <p> |
||||
* <b>Note that letters H and W are treated specially.</b> |
||||
* They are ignored (after the first letter) and don't act as separators |
||||
* between consonants with the same code. |
||||
* @see #US_ENGLISH_MAPPING |
||||
*/ |
||||
// ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
public static final String US_ENGLISH_MAPPING_STRING = "01230120022455012623010202"; |
||||
|
||||
/** |
||||
* This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position |
||||
* means do not encode. |
||||
* |
||||
* @see Soundex#Soundex(char[]) |
||||
*/ |
||||
private static final char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.toCharArray(); |
||||
|
||||
/** |
||||
* An instance of Soundex using the US_ENGLISH_MAPPING mapping. |
||||
* This treats H and W as silent letters. |
||||
* Apart from when they appear as the first letter, they are ignored. |
||||
* They don't act as separators between duplicate codes. |
||||
* |
||||
* @see #US_ENGLISH_MAPPING |
||||
* @see #US_ENGLISH_MAPPING_STRING |
||||
*/ |
||||
public static final Soundex US_ENGLISH = new Soundex(); |
||||
|
||||
/** |
||||
* An instance of Soundex using the Simplified Soundex mapping, as described here: |
||||
* http://west-penwith.org.uk/misc/soundex.htm
|
||||
* <p> |
||||
* This treats H and W the same as vowels (AEIOUY). |
||||
* Such letters aren't encoded (after the first), but they do |
||||
* act as separators when dropping duplicate codes. |
||||
* The mapping is otherwise the same as for {@link #US_ENGLISH} |
||||
* <p> |
||||
* @since 1.11 |
||||
*/ |
||||
public static final Soundex US_ENGLISH_SIMPLIFIED = new Soundex(US_ENGLISH_MAPPING_STRING, false); |
||||
|
||||
/** |
||||
* An instance of Soundex using the mapping as per the Genealogy site: |
||||
* http://www.genealogy.com/articles/research/00000060.html
|
||||
* <p> |
||||
* This treats vowels (AEIOUY), H and W as silent letters. |
||||
* Such letters are ignored (after the first) and do not |
||||
* act as separators when dropping duplicate codes. |
||||
* <p> |
||||
* The codes for consonants are otherwise the same as for |
||||
* {@link #US_ENGLISH_MAPPING_STRING} and {@link #US_ENGLISH_SIMPLIFIED} |
||||
* |
||||
* @since 1.11 |
||||
*/ |
||||
public static final Soundex US_ENGLISH_GENEALOGY = new Soundex("-123-12--22455-12623-1-2-2"); |
||||
// ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
|
||||
/** |
||||
* The maximum length of a Soundex code - Soundex codes are only four characters by definition. |
||||
* |
||||
* @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0. |
||||
*/ |
||||
@Deprecated |
||||
private int maxLength = 4; |
||||
|
||||
/** |
||||
* Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each |
||||
* letter is mapped. This implementation contains a default map for US_ENGLISH |
||||
*/ |
||||
private final char[] soundexMapping; |
||||
|
||||
/** |
||||
* Should H and W be treated specially? |
||||
* <p> |
||||
* In versions of the code prior to 1.11, |
||||
* the code always treated H and W as silent (ignored) letters. |
||||
* If this field is false, H and W are no longer special-cased. |
||||
*/ |
||||
private final boolean specialCaseHW; |
||||
|
||||
/** |
||||
* Creates an instance using US_ENGLISH_MAPPING |
||||
* |
||||
* @see Soundex#Soundex(char[]) |
||||
* @see Soundex#US_ENGLISH_MAPPING |
||||
*/ |
||||
public Soundex() { |
||||
this.soundexMapping = US_ENGLISH_MAPPING; |
||||
this.specialCaseHW = true; |
||||
} |
||||
|
||||
/** |
||||
* Creates a soundex instance using the given mapping. This constructor can be used to provide an internationalized |
||||
* mapping for a non-Western character set. |
||||
* |
||||
* Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each |
||||
* letter is mapped. This implementation contains a default map for US_ENGLISH |
||||
* <p> |
||||
* If the mapping contains an instance of {@link #SILENT_MARKER} then H and W are not given special treatment |
||||
* |
||||
* @param mapping |
||||
* Mapping array to use when finding the corresponding code for a given character |
||||
*/ |
||||
public Soundex(final char[] mapping) { |
||||
this.soundexMapping = new char[mapping.length]; |
||||
System.arraycopy(mapping, 0, this.soundexMapping, 0, mapping.length); |
||||
this.specialCaseHW = !hasMarker(this.soundexMapping); |
||||
} |
||||
|
||||
private boolean hasMarker(final char[] mapping) { |
||||
for(final char ch : mapping) { |
||||
if (ch == SILENT_MARKER) { |
||||
return true; |
||||
} |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
/** |
||||
* Creates a refined soundex instance using a custom mapping. This constructor can be used to customize the mapping, |
||||
* and/or possibly provide an internationalized mapping for a non-Western character set. |
||||
* <p> |
||||
* If the mapping contains an instance of {@link #SILENT_MARKER} then H and W are not given special treatment |
||||
* |
||||
* @param mapping |
||||
* Mapping string to use when finding the corresponding code for a given character |
||||
* @since 1.4 |
||||
*/ |
||||
public Soundex(final String mapping) { |
||||
this.soundexMapping = mapping.toCharArray(); |
||||
this.specialCaseHW = !hasMarker(this.soundexMapping); |
||||
} |
||||
|
||||
/** |
||||
* Creates a refined soundex instance using a custom mapping. This constructor can be used to customize the mapping, |
||||
* and/or possibly provide an internationalized mapping for a non-Western character set. |
||||
* |
||||
* @param mapping |
||||
* Mapping string to use when finding the corresponding code for a given character |
||||
* @param specialCaseHW if true, then |
||||
* @since 1.11 |
||||
*/ |
||||
public Soundex(final String mapping, final boolean specialCaseHW) { |
||||
this.soundexMapping = mapping.toCharArray(); |
||||
this.specialCaseHW = specialCaseHW; |
||||
} |
||||
|
||||
/** |
||||
* Encodes the Strings and returns the number of characters in the two encoded Strings that are the same. This |
||||
* return value ranges from 0 through 4: 0 indicates little or no similarity, and 4 indicates strong similarity or |
||||
* identical values. |
||||
* |
||||
* @param s1 |
||||
* A String that will be encoded and compared. |
||||
* @param s2 |
||||
* A String that will be encoded and compared. |
||||
* @return The number of characters in the two encoded Strings that are the same from 0 to 4. |
||||
* |
||||
* @see SoundexUtils#difference(StringEncoder,String,String) |
||||
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> MS |
||||
* T-SQL DIFFERENCE </a> |
||||
* |
||||
* @throws EncoderException |
||||
* if an error occurs encoding one of the strings |
||||
* @since 1.3 |
||||
*/ |
||||
public int difference(final String s1, final String s2) throws EncoderException { |
||||
return SoundexUtils.difference(this, s1, s2); |
||||
} |
||||
|
||||
/** |
||||
* Encodes an Object using the soundex algorithm. This method is provided in order to satisfy the requirements of |
||||
* the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String. |
||||
* |
||||
* @param obj |
||||
* Object to encode |
||||
* @return An object (or type java.lang.String) containing the soundex code which corresponds to the String |
||||
* supplied. |
||||
* @throws EncoderException |
||||
* if the parameter supplied is not of type java.lang.String |
||||
* @throws IllegalArgumentException |
||||
* if a character is not mapped |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object obj) throws EncoderException { |
||||
if (!(obj instanceof String)) { |
||||
throw new EncoderException("Parameter supplied to Soundex encode is not of type java.lang.String"); |
||||
} |
||||
return soundex((String) obj); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a String using the soundex algorithm. |
||||
* |
||||
* @param str |
||||
* A String object to encode |
||||
* @return A Soundex code corresponding to the String supplied |
||||
* @throws IllegalArgumentException |
||||
* if a character is not mapped |
||||
*/ |
||||
@Override |
||||
public String encode(final String str) { |
||||
return soundex(str); |
||||
} |
||||
|
||||
/** |
||||
* Returns the maxLength. Standard Soundex |
||||
* |
||||
* @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0. |
||||
* @return int |
||||
*/ |
||||
@Deprecated |
||||
public int getMaxLength() { |
||||
return this.maxLength; |
||||
} |
||||
|
||||
/** |
||||
* Maps the given upper-case character to its Soundex code. |
||||
* |
||||
* @param ch |
||||
* An upper-case character. |
||||
* @return A Soundex code. |
||||
* @throws IllegalArgumentException |
||||
* Thrown if <code>ch</code> is not mapped. |
||||
*/ |
||||
private char map(final char ch) { |
||||
final int index = ch - 'A'; |
||||
if (index < 0 || index >= this.soundexMapping.length) { |
||||
throw new IllegalArgumentException("The character is not mapped: " + ch + " (index=" + index + ")"); |
||||
} |
||||
return this.soundexMapping[index]; |
||||
} |
||||
|
||||
/** |
||||
* Sets the maxLength. |
||||
* |
||||
* @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0. |
||||
* @param maxLength |
||||
* The maxLength to set |
||||
*/ |
||||
@Deprecated |
||||
public void setMaxLength(final int maxLength) { |
||||
this.maxLength = maxLength; |
||||
} |
||||
|
||||
/** |
||||
* Retrieves the Soundex code for a given String object. |
||||
* |
||||
* @param str |
||||
* String to encode using the Soundex algorithm |
||||
* @return A soundex code for the String supplied |
||||
* @throws IllegalArgumentException |
||||
* if a character is not mapped |
||||
*/ |
||||
public String soundex(String str) { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
str = SoundexUtils.clean(str); |
||||
if (str.length() == 0) { |
||||
return str; |
||||
} |
||||
final char out[] = {'0', '0', '0', '0'}; |
||||
int count = 0; |
||||
final char first = str.charAt(0); |
||||
out[count++] = first; |
||||
char lastDigit = map(first); // previous digit
|
||||
for(int i = 1; i < str.length() && count < out.length ; i++) { |
||||
final char ch = str.charAt(i); |
||||
if ((this.specialCaseHW) && (ch == 'H' || ch == 'W')) { // these are ignored completely
|
||||
continue; |
||||
} |
||||
final char digit = map(ch); |
||||
if (digit == SILENT_MARKER) { |
||||
continue; |
||||
} |
||||
if (digit != '0' && digit != lastDigit) { // don't store vowels or repeats
|
||||
out[count++] = digit; |
||||
} |
||||
lastDigit = digit; |
||||
} |
||||
return new String(out); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,124 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Utility methods for {@link Soundex} and {@link RefinedSoundex} classes. |
||||
* |
||||
* <p>This class is immutable and thread-safe.</p> |
||||
* |
||||
* @version $Id: SoundexUtils.java 1429868 2013-01-07 16:08:05Z ggregory $ |
||||
* @since 1.3 |
||||
*/ |
||||
final class SoundexUtils { |
||||
|
||||
/** |
||||
* Cleans up the input string before Soundex processing by only returning |
||||
* upper case letters. |
||||
* |
||||
* @param str |
||||
* The String to clean. |
||||
* @return A clean String. |
||||
*/ |
||||
static String clean(final String str) { |
||||
if (str == null || str.length() == 0) { |
||||
return str; |
||||
} |
||||
final int len = str.length(); |
||||
final char[] chars = new char[len]; |
||||
int count = 0; |
||||
for (int i = 0; i < len; i++) { |
||||
if (Character.isLetter(str.charAt(i))) { |
||||
chars[count++] = str.charAt(i); |
||||
} |
||||
} |
||||
if (count == len) { |
||||
return str.toUpperCase(java.util.Locale.ENGLISH); |
||||
} |
||||
return new String(chars, 0, count).toUpperCase(java.util.Locale.ENGLISH); |
||||
} |
||||
|
||||
/** |
||||
* Encodes the Strings and returns the number of characters in the two |
||||
* encoded Strings that are the same. |
||||
* <ul> |
||||
* <li>For Soundex, this return value ranges from 0 through 4: 0 indicates |
||||
* little or no similarity, and 4 indicates strong similarity or identical |
||||
* values.</li> |
||||
* <li>For refined Soundex, the return value can be greater than 4.</li> |
||||
* </ul> |
||||
* |
||||
* @param encoder |
||||
* The encoder to use to encode the Strings. |
||||
* @param s1 |
||||
* A String that will be encoded and compared. |
||||
* @param s2 |
||||
* A String that will be encoded and compared. |
||||
* @return The number of characters in the two Soundex encoded Strings that |
||||
* are the same. |
||||
* |
||||
* @see #differenceEncoded(String,String) |
||||
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> |
||||
* MS T-SQL DIFFERENCE</a> |
||||
* |
||||
* @throws EncoderException |
||||
* if an error occurs encoding one of the strings |
||||
*/ |
||||
static int difference(final StringEncoder encoder, final String s1, final String s2) throws EncoderException { |
||||
return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); |
||||
} |
||||
|
||||
/** |
||||
* Returns the number of characters in the two Soundex encoded Strings that |
||||
* are the same. |
||||
* <ul> |
||||
* <li>For Soundex, this return value ranges from 0 through 4: 0 indicates |
||||
* little or no similarity, and 4 indicates strong similarity or identical |
||||
* values.</li> |
||||
* <li>For refined Soundex, the return value can be greater than 4.</li> |
||||
* </ul> |
||||
* |
||||
* @param es1 |
||||
* An encoded String. |
||||
* @param es2 |
||||
* An encoded String. |
||||
* @return The number of characters in the two Soundex encoded Strings that |
||||
* are the same. |
||||
* |
||||
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> |
||||
* MS T-SQL DIFFERENCE</a> |
||||
*/ |
||||
static int differenceEncoded(final String es1, final String es2) { |
||||
|
||||
if (es1 == null || es2 == null) { |
||||
return 0; |
||||
} |
||||
final int lengthToMatch = Math.min(es1.length(), es2.length()); |
||||
int diff = 0; |
||||
for (int i = 0; i < lengthToMatch; i++) { |
||||
if (es1.charAt(i) == es2.charAt(i)) { |
||||
diff++; |
||||
} |
||||
} |
||||
return diff; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,184 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language.bm; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Encodes strings into their Beider-Morse phonetic encoding. |
||||
* <p> |
||||
* Beider-Morse phonetic encodings are optimised for family names. However, they may be useful for a wide range of |
||||
* words. |
||||
* <p> |
||||
* This encoder is intentionally mutable to allow dynamic configuration through bean properties. As such, it is mutable, |
||||
* and may not be thread-safe. If you require a guaranteed thread-safe encoding then use {@link PhoneticEngine} |
||||
* directly. |
||||
* <p> |
||||
* <b>Encoding overview</b> |
||||
* <p> |
||||
* Beider-Morse phonetic encodings is a multi-step process. Firstly, a table of rules is consulted to guess what |
||||
* language the word comes from. For example, if it ends in "<code>ault</code>" then it infers that the word is French. |
||||
* Next, the word is translated into a phonetic representation using a language-specific phonetics table. Some runs of |
||||
* letters can be pronounced in multiple ways, and a single run of letters may be potentially broken up into phonemes at |
||||
* different places, so this stage results in a set of possible language-specific phonetic representations. Lastly, this |
||||
* language-specific phonetic representation is processed by a table of rules that re-writes it phonetically taking into |
||||
* account systematic pronunciation differences between languages, to move it towards a pan-indo-european phonetic |
||||
* representation. Again, sometimes there are multiple ways this could be done and sometimes things that can be |
||||
* pronounced in several ways in the source language have only one way to represent them in this average phonetic |
||||
* language, so the result is again a set of phonetic spellings. |
||||
* <p> |
||||
* Some names are treated as having multiple parts. This can be due to two things. Firstly, they may be hyphenated. In |
||||
* this case, each individual hyphenated word is encoded, and then these are combined end-to-end for the final encoding. |
||||
* Secondly, some names have standard prefixes, for example, "<code>Mac/Mc</code>" in Scottish (English) names. As |
||||
* sometimes it is ambiguous whether the prefix is intended or is an accident of the spelling, the word is encoded once |
||||
* with the prefix and once without it. The resulting encoding contains one and then the other result. |
||||
* <p> |
||||
* <b>Encoding format</b> |
||||
* <p> |
||||
* Individual phonetic spellings of an input word are represented in upper- and lower-case roman characters. Where there |
||||
* are multiple possible phonetic representations, these are joined with a pipe (<code>|</code>) character. If multiple |
||||
* hyphenated words where found, or if the word may contain a name prefix, each encoded word is placed in elipses and |
||||
* these blocks are then joined with hyphens. For example, "<code>d'ortley</code>" has a possible prefix. The form |
||||
* without prefix encodes to "<code>ortlaj|ortlej</code>", while the form with prefix encodes to " |
||||
* <code>dortlaj|dortlej</code>". Thus, the full, combined encoding is "<code>(ortlaj|ortlej)-(dortlaj|dortlej)</code>". |
||||
* <p> |
||||
* The encoded forms are often quite a bit longer than the input strings. This is because a single input may have many |
||||
* potential phonetic interpretations. For example, "<code>Renault</code>" encodes to " |
||||
* <code>rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult</code>". The <code>APPROX</code> rules will tend to produce larger |
||||
* encodings as they consider a wider range of possible, approximate phonetic interpretations of the original word. |
||||
* Down-stream applications may wish to further process the encoding for indexing or lookup purposes, for example, by |
||||
* splitting on pipe (<code>|</code>) and indexing under each of these alternatives. |
||||
* <p> |
||||
* <b>Note</b>: this version of the Beider-Morse encoding is equivalent with v3.4 of the reference implementation. |
||||
* </p> |
||||
* @see <a href="http://stevemorse.org/phonetics/bmpm.htm">Beider-Morse Phonetic Matching</a> |
||||
* @see <a href="http://stevemorse.org/phoneticinfo.htm">Reference implementation</a> |
||||
* |
||||
* <p> |
||||
* This class is Not ThreadSafe |
||||
* </p> |
||||
* @since 1.6 |
||||
* @version $Id: BeiderMorseEncoder.java 1744724 2016-05-20 12:24:04Z sebb $ |
||||
*/ |
||||
public class BeiderMorseEncoder implements StringEncoder { |
||||
// Implementation note: This class is a spring-friendly facade to PhoneticEngine. It allows read/write configuration
|
||||
// of an immutable PhoneticEngine instance that will be delegated to for the actual encoding.
|
||||
|
||||
// a cached object
|
||||
private PhoneticEngine engine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true); |
||||
|
||||
@Override |
||||
public Object encode(final Object source) throws EncoderException { |
||||
if (!(source instanceof String)) { |
||||
throw new EncoderException("BeiderMorseEncoder encode parameter is not of type String"); |
||||
} |
||||
return encode((String) source); |
||||
} |
||||
|
||||
@Override |
||||
public String encode(final String source) throws EncoderException { |
||||
if (source == null) { |
||||
return null; |
||||
} |
||||
return this.engine.encode(source); |
||||
} |
||||
|
||||
/** |
||||
* Gets the name type currently in operation. |
||||
* |
||||
* @return the NameType currently being used |
||||
*/ |
||||
public NameType getNameType() { |
||||
return this.engine.getNameType(); |
||||
} |
||||
|
||||
/** |
||||
* Gets the rule type currently in operation. |
||||
* |
||||
* @return the RuleType currently being used |
||||
*/ |
||||
public RuleType getRuleType() { |
||||
return this.engine.getRuleType(); |
||||
} |
||||
|
||||
/** |
||||
* Discovers if multiple possible encodings are concatenated. |
||||
* |
||||
* @return true if multiple encodings are concatenated, false if just the first one is returned |
||||
*/ |
||||
public boolean isConcat() { |
||||
return this.engine.isConcat(); |
||||
} |
||||
|
||||
/** |
||||
* Sets how multiple possible phonetic encodings are combined. |
||||
* |
||||
* @param concat |
||||
* true if multiple encodings are to be combined with a '|', false if just the first one is |
||||
* to be considered |
||||
*/ |
||||
public void setConcat(final boolean concat) { |
||||
this.engine = new PhoneticEngine(this.engine.getNameType(), |
||||
this.engine.getRuleType(), |
||||
concat, |
||||
this.engine.getMaxPhonemes()); |
||||
} |
||||
|
||||
/** |
||||
* Sets the type of name. Use {@link NameType#GENERIC} unless you specifically want phonetic encodings |
||||
* optimized for Ashkenazi or Sephardic Jewish family names. |
||||
* |
||||
* @param nameType |
||||
* the NameType in use |
||||
*/ |
||||
public void setNameType(final NameType nameType) { |
||||
this.engine = new PhoneticEngine(nameType, |
||||
this.engine.getRuleType(), |
||||
this.engine.isConcat(), |
||||
this.engine.getMaxPhonemes()); |
||||
} |
||||
|
||||
/** |
||||
* Sets the rule type to apply. This will widen or narrow the range of phonetic encodings considered. |
||||
* |
||||
* @param ruleType |
||||
* {@link RuleType#APPROX} or {@link RuleType#EXACT} for approximate or exact phonetic matches |
||||
*/ |
||||
public void setRuleType(final RuleType ruleType) { |
||||
this.engine = new PhoneticEngine(this.engine.getNameType(), |
||||
ruleType, |
||||
this.engine.isConcat(), |
||||
this.engine.getMaxPhonemes()); |
||||
} |
||||
|
||||
/** |
||||
* Sets the number of maximum of phonemes that shall be considered by the engine. |
||||
* |
||||
* @param maxPhonemes |
||||
* the maximum number of phonemes returned by the engine |
||||
* @since 1.7 |
||||
*/ |
||||
public void setMaxPhonemes(final int maxPhonemes) { |
||||
this.engine = new PhoneticEngine(this.engine.getNameType(), |
||||
this.engine.getRuleType(), |
||||
this.engine.isConcat(), |
||||
maxPhonemes); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,231 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language.bm; |
||||
|
||||
import java.io.InputStream; |
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.Collections; |
||||
import java.util.EnumMap; |
||||
import java.util.HashSet; |
||||
import java.util.List; |
||||
import java.util.Locale; |
||||
import java.util.Map; |
||||
import java.util.Scanner; |
||||
import java.util.Set; |
||||
import java.util.regex.Pattern; |
||||
|
||||
/** |
||||
* Language guessing utility. |
||||
* <p> |
||||
* This class encapsulates rules used to guess the possible languages that a word originates from. This is |
||||
* done by reference to a whole series of rules distributed in resource files. |
||||
* <p> |
||||
* Instances of this class are typically managed through the static factory method instance(). |
||||
* Unless you are developing your own language guessing rules, you will not need to interact with this class directly. |
||||
* <p> |
||||
* This class is intended to be immutable and thread-safe. |
||||
* <p> |
||||
* <b>Lang resources</b> |
||||
* <p> |
||||
* Language guessing rules are typically loaded from resource files. These are UTF-8 encoded text files. |
||||
* They are systematically named following the pattern: |
||||
* <blockquote>org/apache/commons/codec/language/bm/lang.txt</blockquote> |
||||
* The format of these resources is the following: |
||||
* <ul> |
||||
* <li><b>Rules:</b> whitespace separated strings. |
||||
* There should be 3 columns to each row, and these will be interpreted as: |
||||
* <ol> |
||||
* <li>pattern: a regular expression.</li> |
||||
* <li>languages: a '+'-separated list of languages.</li> |
||||
* <li>acceptOnMatch: 'true' or 'false' indicating if a match rules in or rules out the language.</li> |
||||
* </ol> |
||||
* </li> |
||||
* <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be
|
||||
* discarded as a comment.</li> |
||||
* <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. |
||||
* This will skip all content until a line ending in '*' and '/' is found.</li> |
||||
* <li><b>Blank lines:</b> All blank lines will be skipped.</li> |
||||
* </ul> |
||||
* <p> |
||||
* Port of lang.php |
||||
* |
||||
* @since 1.6 |
||||
* @version $Id: Lang.java 1608115 2014-07-05 19:58:38Z tn $ |
||||
*/ |
||||
public class Lang { |
||||
// Implementation note: This class is divided into two sections. The first part is a static factory interface that
|
||||
// exposes the LANGUAGE_RULES_RN resource as a Lang instance. The second part is the Lang instance methods that
|
||||
// encapsulate a particular language-guessing rule table and the language guessing itself.
|
||||
//
|
||||
// It may make sense in the future to expose the private constructor to allow power users to build custom language-
|
||||
// guessing rules, perhaps by marking it protected and allowing sub-classing. However, the vast majority of users
|
||||
// should be strongly encouraged to use the static factory <code>instance</code> method to get their Lang instances.
|
||||
|
||||
private static final class LangRule { |
||||
private final boolean acceptOnMatch; |
||||
private final Set<String> languages; |
||||
private final Pattern pattern; |
||||
|
||||
private LangRule(final Pattern pattern, final Set<String> languages, final boolean acceptOnMatch) { |
||||
this.pattern = pattern; |
||||
this.languages = languages; |
||||
this.acceptOnMatch = acceptOnMatch; |
||||
} |
||||
|
||||
public boolean matches(final String txt) { |
||||
return this.pattern.matcher(txt).find(); |
||||
} |
||||
} |
||||
|
||||
private static final Map<NameType, Lang> Langs = new EnumMap<NameType, Lang>(NameType.class); |
||||
|
||||
private static final String LANGUAGE_RULES_RN = "com/fr/third/org/apache/commons/codec/language/bm/%s_lang.txt"; |
||||
|
||||
static { |
||||
for (final NameType s : NameType.values()) { |
||||
Langs.put(s, loadFromResource(String.format(LANGUAGE_RULES_RN, s.getName()), Languages.getInstance(s))); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Gets a Lang instance for one of the supported NameTypes. |
||||
* |
||||
* @param nameType |
||||
* the NameType to look up |
||||
* @return a Lang encapsulating the language guessing rules for that name type |
||||
*/ |
||||
public static Lang instance(final NameType nameType) { |
||||
return Langs.get(nameType); |
||||
} |
||||
|
||||
/** |
||||
* Loads language rules from a resource. |
||||
* <p> |
||||
* In normal use, you will obtain instances of Lang through the {@link #instance(NameType)} method. |
||||
* You will only need to call this yourself if you are developing custom language mapping rules. |
||||
* |
||||
* @param languageRulesResourceName |
||||
* the fully-qualified resource name to load |
||||
* @param languages |
||||
* the languages that these rules will support |
||||
* @return a Lang encapsulating the loaded language-guessing rules. |
||||
*/ |
||||
public static Lang loadFromResource(final String languageRulesResourceName, final Languages languages) { |
||||
final List<LangRule> rules = new ArrayList<LangRule>(); |
||||
final InputStream lRulesIS = Lang.class.getClassLoader().getResourceAsStream(languageRulesResourceName); |
||||
|
||||
if (lRulesIS == null) { |
||||
throw new IllegalStateException("Unable to resolve required resource:" + LANGUAGE_RULES_RN); |
||||
} |
||||
|
||||
final Scanner scanner = new Scanner(lRulesIS, ResourceConstants.ENCODING); |
||||
try { |
||||
boolean inExtendedComment = false; |
||||
while (scanner.hasNextLine()) { |
||||
final String rawLine = scanner.nextLine(); |
||||
String line = rawLine; |
||||
if (inExtendedComment) { |
||||
// check for closing comment marker, otherwise discard doc comment line
|
||||
if (line.endsWith(ResourceConstants.EXT_CMT_END)) { |
||||
inExtendedComment = false; |
||||
} |
||||
} else { |
||||
if (line.startsWith(ResourceConstants.EXT_CMT_START)) { |
||||
inExtendedComment = true; |
||||
} else { |
||||
// discard comments
|
||||
final int cmtI = line.indexOf(ResourceConstants.CMT); |
||||
if (cmtI >= 0) { |
||||
line = line.substring(0, cmtI); |
||||
} |
||||
|
||||
// trim leading-trailing whitespace
|
||||
line = line.trim(); |
||||
|
||||
if (line.length() == 0) { |
||||
continue; // empty lines can be safely skipped
|
||||
} |
||||
|
||||
// split it up
|
||||
final String[] parts = line.split("\\s+"); |
||||
|
||||
if (parts.length != 3) { |
||||
throw new IllegalArgumentException("Malformed line '" + rawLine + |
||||
"' in language resource '" + languageRulesResourceName + "'"); |
||||
} |
||||
|
||||
final Pattern pattern = Pattern.compile(parts[0]); |
||||
final String[] langs = parts[1].split("\\+"); |
||||
final boolean accept = parts[2].equals("true"); |
||||
|
||||
rules.add(new LangRule(pattern, new HashSet<String>(Arrays.asList(langs)), accept)); |
||||
} |
||||
} |
||||
} |
||||
} finally { |
||||
scanner.close(); |
||||
} |
||||
return new Lang(rules, languages); |
||||
} |
||||
|
||||
private final Languages languages; |
||||
private final List<LangRule> rules; |
||||
|
||||
private Lang(final List<LangRule> rules, final Languages languages) { |
||||
this.rules = Collections.unmodifiableList(rules); |
||||
this.languages = languages; |
||||
} |
||||
|
||||
/** |
||||
* Guesses the language of a word. |
||||
* |
||||
* @param text |
||||
* the word |
||||
* @return the language that the word originates from or {@link Languages#ANY} if there was no unique match |
||||
*/ |
||||
public String guessLanguage(final String text) { |
||||
final Languages.LanguageSet ls = guessLanguages(text); |
||||
return ls.isSingleton() ? ls.getAny() : Languages.ANY; |
||||
} |
||||
|
||||
/** |
||||
* Guesses the languages of a word. |
||||
* |
||||
* @param input |
||||
* the word |
||||
* @return a Set of Strings of language names that are potential matches for the input word |
||||
*/ |
||||
public Languages.LanguageSet guessLanguages(final String input) { |
||||
final String text = input.toLowerCase(Locale.ENGLISH); |
||||
|
||||
final Set<String> langs = new HashSet<String>(this.languages.getLanguages()); |
||||
for (final LangRule rule : this.rules) { |
||||
if (rule.matches(text)) { |
||||
if (rule.acceptOnMatch) { |
||||
langs.retainAll(rule.languages); |
||||
} else { |
||||
langs.removeAll(rule.languages); |
||||
} |
||||
} |
||||
} |
||||
|
||||
final Languages.LanguageSet ls = Languages.LanguageSet.from(langs); |
||||
return ls.equals(Languages.NO_LANGUAGES) ? Languages.ANY_LANGUAGE : ls; |
||||
} |
||||
} |
@ -0,0 +1,295 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language.bm; |
||||
|
||||
import java.io.InputStream; |
||||
import java.util.Collections; |
||||
import java.util.EnumMap; |
||||
import java.util.HashSet; |
||||
import java.util.Map; |
||||
import java.util.NoSuchElementException; |
||||
import java.util.Scanner; |
||||
import java.util.Set; |
||||
|
||||
/** |
||||
* Language codes. |
||||
* <p> |
||||
* Language codes are typically loaded from resource files. These are UTF-8 encoded text files. They are |
||||
* systematically named following the pattern: |
||||
* <blockquote>org/apache/commons/codec/language/bm/${{@link NameType#getName()} languages.txt</blockquote> |
||||
* <p> |
||||
* The format of these resources is the following: |
||||
* <ul> |
||||
* <li><b>Language:</b> a single string containing no whitespace</li> |
||||
* <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be
|
||||
* discarded as a comment.</li> |
||||
* <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. |
||||
* This will skip all content until a line ending in '*' and '/' is found.</li> |
||||
* <li><b>Blank lines:</b> All blank lines will be skipped.</li> |
||||
* </ul> |
||||
* <p> |
||||
* Ported from language.php |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* |
||||
* @since 1.6 |
||||
* @version $Id: Languages.java 1694610 2015-08-07 03:47:38Z ggregory $ |
||||
*/ |
||||
public class Languages { |
||||
// Implementation note: This class is divided into two sections. The first part is a static factory interface that
|
||||
// exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in NameType.* as a list of supported
|
||||
// languages, and a second part that provides instance methods for accessing this set for supported languages.
|
||||
|
||||
/** |
||||
* A set of languages. |
||||
*/ |
||||
public static abstract class LanguageSet { |
||||
|
||||
public static LanguageSet from(final Set<String> langs) { |
||||
return langs.isEmpty() ? NO_LANGUAGES : new SomeLanguages(langs); |
||||
} |
||||
|
||||
public abstract boolean contains(String language); |
||||
|
||||
public abstract String getAny(); |
||||
|
||||
public abstract boolean isEmpty(); |
||||
|
||||
public abstract boolean isSingleton(); |
||||
|
||||
public abstract LanguageSet restrictTo(LanguageSet other); |
||||
|
||||
abstract LanguageSet merge(LanguageSet other); |
||||
} |
||||
|
||||
/** |
||||
* Some languages, explicitly enumerated. |
||||
*/ |
||||
public static final class SomeLanguages extends LanguageSet { |
||||
private final Set<String> languages; |
||||
|
||||
private SomeLanguages(final Set<String> languages) { |
||||
this.languages = Collections.unmodifiableSet(languages); |
||||
} |
||||
|
||||
@Override |
||||
public boolean contains(final String language) { |
||||
return this.languages.contains(language); |
||||
} |
||||
|
||||
@Override |
||||
public String getAny() { |
||||
return this.languages.iterator().next(); |
||||
} |
||||
|
||||
public Set<String> getLanguages() { |
||||
return this.languages; |
||||
} |
||||
|
||||
@Override |
||||
public boolean isEmpty() { |
||||
return this.languages.isEmpty(); |
||||
} |
||||
|
||||
@Override |
||||
public boolean isSingleton() { |
||||
return this.languages.size() == 1; |
||||
} |
||||
|
||||
@Override |
||||
public LanguageSet restrictTo(final LanguageSet other) { |
||||
if (other == NO_LANGUAGES) { |
||||
return other; |
||||
} else if (other == ANY_LANGUAGE) { |
||||
return this; |
||||
} else { |
||||
final SomeLanguages sl = (SomeLanguages) other; |
||||
final Set<String> ls = new HashSet<String>(Math.min(languages.size(), sl.languages.size())); |
||||
for (final String lang : languages) { |
||||
if (sl.languages.contains(lang)) { |
||||
ls.add(lang); |
||||
} |
||||
} |
||||
return from(ls); |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public LanguageSet merge(final LanguageSet other) { |
||||
if (other == NO_LANGUAGES) { |
||||
return this; |
||||
} else if (other == ANY_LANGUAGE) { |
||||
return other; |
||||
} else { |
||||
final SomeLanguages sl = (SomeLanguages) other; |
||||
final Set<String> ls = new HashSet<String>(languages); |
||||
for (final String lang : sl.languages) { |
||||
ls.add(lang); |
||||
} |
||||
return from(ls); |
||||
} |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
return "Languages(" + languages.toString() + ")"; |
||||
} |
||||
|
||||
} |
||||
|
||||
public static final String ANY = "any"; |
||||
|
||||
private static final Map<NameType, Languages> LANGUAGES = new EnumMap<NameType, Languages>(NameType.class); |
||||
|
||||
static { |
||||
for (final NameType s : NameType.values()) { |
||||
LANGUAGES.put(s, getInstance(langResourceName(s))); |
||||
} |
||||
} |
||||
|
||||
public static Languages getInstance(final NameType nameType) { |
||||
return LANGUAGES.get(nameType); |
||||
} |
||||
|
||||
public static Languages getInstance(final String languagesResourceName) { |
||||
// read languages list
|
||||
final Set<String> ls = new HashSet<String>(); |
||||
final InputStream langIS = Languages.class.getClassLoader().getResourceAsStream(languagesResourceName); |
||||
|
||||
if (langIS == null) { |
||||
throw new IllegalArgumentException("Unable to resolve required resource: " + languagesResourceName); |
||||
} |
||||
|
||||
final Scanner lsScanner = new Scanner(langIS, ResourceConstants.ENCODING); |
||||
try { |
||||
boolean inExtendedComment = false; |
||||
while (lsScanner.hasNextLine()) { |
||||
final String line = lsScanner.nextLine().trim(); |
||||
if (inExtendedComment) { |
||||
if (line.endsWith(ResourceConstants.EXT_CMT_END)) { |
||||
inExtendedComment = false; |
||||
} |
||||
} else { |
||||
if (line.startsWith(ResourceConstants.EXT_CMT_START)) { |
||||
inExtendedComment = true; |
||||
} else if (line.length() > 0) { |
||||
ls.add(line); |
||||
} |
||||
} |
||||
} |
||||
} finally { |
||||
lsScanner.close(); |
||||
} |
||||
|
||||
return new Languages(Collections.unmodifiableSet(ls)); |
||||
} |
||||
|
||||
private static String langResourceName(final NameType nameType) { |
||||
return String.format("com/fr/third/org/apache/commons/codec/language/bm/%s_languages.txt", nameType.getName()); |
||||
} |
||||
|
||||
private final Set<String> languages; |
||||
|
||||
/** |
||||
* No languages at all. |
||||
*/ |
||||
public static final LanguageSet NO_LANGUAGES = new LanguageSet() { |
||||
@Override |
||||
public boolean contains(final String language) { |
||||
return false; |
||||
} |
||||
|
||||
@Override |
||||
public String getAny() { |
||||
throw new NoSuchElementException("Can't fetch any language from the empty language set."); |
||||
} |
||||
|
||||
@Override |
||||
public boolean isEmpty() { |
||||
return true; |
||||
} |
||||
|
||||
@Override |
||||
public boolean isSingleton() { |
||||
return false; |
||||
} |
||||
|
||||
@Override |
||||
public LanguageSet restrictTo(final LanguageSet other) { |
||||
return this; |
||||
} |
||||
|
||||
@Override |
||||
public LanguageSet merge(final LanguageSet other) { |
||||
return other; |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
return "NO_LANGUAGES"; |
||||
} |
||||
}; |
||||
|
||||
/** |
||||
* Any/all languages. |
||||
*/ |
||||
public static final LanguageSet ANY_LANGUAGE = new LanguageSet() { |
||||
@Override |
||||
public boolean contains(final String language) { |
||||
return true; |
||||
} |
||||
|
||||
@Override |
||||
public String getAny() { |
||||
throw new NoSuchElementException("Can't fetch any language from the any language set."); |
||||
} |
||||
|
||||
@Override |
||||
public boolean isEmpty() { |
||||
return false; |
||||
} |
||||
|
||||
@Override |
||||
public boolean isSingleton() { |
||||
return false; |
||||
} |
||||
|
||||
@Override |
||||
public LanguageSet restrictTo(final LanguageSet other) { |
||||
return other; |
||||
} |
||||
|
||||
@Override |
||||
public LanguageSet merge(final LanguageSet other) { |
||||
return other; |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
return "ANY_LANGUAGE"; |
||||
} |
||||
}; |
||||
|
||||
private Languages(final Set<String> languages) { |
||||
this.languages = languages; |
||||
} |
||||
|
||||
public Set<String> getLanguages() { |
||||
return this.languages; |
||||
} |
||||
} |
@ -0,0 +1,53 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language.bm; |
||||
|
||||
/** |
||||
* Supported types of names. Unless you are matching particular family names, use {@link #GENERIC}. The |
||||
* <code>GENERIC</code> NameType should work reasonably well for non-name words. The other encodings are |
||||
* specifically tuned to family names, and may not work well at all for general text. |
||||
* |
||||
* @since 1.6 |
||||
* @version $Id: NameType.java 1429868 2013-01-07 16:08:05Z ggregory $ |
||||
*/ |
||||
public enum NameType { |
||||
|
||||
/** Ashkenazi family names */ |
||||
ASHKENAZI("ash"), |
||||
|
||||
/** Generic names and words */ |
||||
GENERIC("gen"), |
||||
|
||||
/** Sephardic family names */ |
||||
SEPHARDIC("sep"); |
||||
|
||||
private final String name; |
||||
|
||||
NameType(final String name) { |
||||
this.name = name; |
||||
} |
||||
|
||||
/** |
||||
* Gets the short version of the name type. |
||||
* |
||||
* @return the NameType short string |
||||
*/ |
||||
public String getName() { |
||||
return this.name; |
||||
} |
||||
} |
@ -0,0 +1,528 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language.bm; |
||||
|
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.Collections; |
||||
import java.util.EnumMap; |
||||
import java.util.HashSet; |
||||
import java.util.Iterator; |
||||
import java.util.LinkedHashSet; |
||||
import java.util.List; |
||||
import java.util.Locale; |
||||
import java.util.Map; |
||||
import java.util.Set; |
||||
import java.util.TreeMap; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.language.bm.Languages.LanguageSet; |
||||
|
||||
/** |
||||
* Converts words into potential phonetic representations. |
||||
* <p> |
||||
* This is a two-stage process. Firstly, the word is converted into a phonetic representation that takes |
||||
* into account the likely source language. Next, this phonetic representation is converted into a |
||||
* pan-European 'average' representation, allowing comparison between different versions of essentially |
||||
* the same word from different languages. |
||||
* <p> |
||||
* This class is intentionally immutable and thread-safe. |
||||
* If you wish to alter the settings for a PhoneticEngine, you |
||||
* must make a new one with the updated settings. |
||||
* <p> |
||||
* Ported from phoneticengine.php |
||||
* |
||||
* @since 1.6 |
||||
* @version $Id: PhoneticEngine.java 1694610 2015-08-07 03:47:38Z ggregory $ |
||||
*/ |
||||
public class PhoneticEngine { |
||||
|
||||
/** |
||||
* Utility for manipulating a set of phonemes as they are being built up. Not intended for use outside |
||||
* this package, and probably not outside the {@link PhoneticEngine} class. |
||||
* |
||||
* @since 1.6 |
||||
*/ |
||||
static final class PhonemeBuilder { |
||||
|
||||
/** |
||||
* An empty builder where all phonemes must come from some set of languages. This will contain a single |
||||
* phoneme of zero characters. This can then be appended to. This should be the only way to create a new |
||||
* phoneme from scratch. |
||||
* |
||||
* @param languages the set of languages |
||||
* @return a new, empty phoneme builder |
||||
*/ |
||||
public static PhonemeBuilder empty(final LanguageSet languages) { |
||||
return new PhonemeBuilder(new Rule.Phoneme("", languages)); |
||||
} |
||||
|
||||
private final Set<Rule.Phoneme> phonemes; |
||||
|
||||
private PhonemeBuilder(final Rule.Phoneme phoneme) { |
||||
this.phonemes = new LinkedHashSet<Rule.Phoneme>(); |
||||
this.phonemes.add(phoneme); |
||||
} |
||||
|
||||
private PhonemeBuilder(final Set<Rule.Phoneme> phonemes) { |
||||
this.phonemes = phonemes; |
||||
} |
||||
|
||||
/** |
||||
* Creates a new phoneme builder containing all phonemes in this one extended by <code>str</code>. |
||||
* |
||||
* @param str the characters to append to the phonemes |
||||
*/ |
||||
public void append(final CharSequence str) { |
||||
for (final Rule.Phoneme ph : this.phonemes) { |
||||
ph.append(str); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Applies the given phoneme expression to all phonemes in this phoneme builder. |
||||
* <p> |
||||
* This will lengthen phonemes that have compatible language sets to the expression, and drop those that are |
||||
* incompatible. |
||||
* |
||||
* @param phonemeExpr the expression to apply |
||||
* @param maxPhonemes the maximum number of phonemes to build up |
||||
*/ |
||||
public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) { |
||||
final Set<Rule.Phoneme> newPhonemes = new LinkedHashSet<Rule.Phoneme>(maxPhonemes); |
||||
|
||||
EXPR: for (final Rule.Phoneme left : this.phonemes) { |
||||
for (final Rule.Phoneme right : phonemeExpr.getPhonemes()) { |
||||
final LanguageSet languages = left.getLanguages().restrictTo(right.getLanguages()); |
||||
if (!languages.isEmpty()) { |
||||
final Rule.Phoneme join = new Rule.Phoneme(left, right, languages); |
||||
if (newPhonemes.size() < maxPhonemes) { |
||||
newPhonemes.add(join); |
||||
if (newPhonemes.size() >= maxPhonemes) { |
||||
break EXPR; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
this.phonemes.clear(); |
||||
this.phonemes.addAll(newPhonemes); |
||||
} |
||||
|
||||
/** |
||||
* Gets underlying phoneme set. Please don't mutate. |
||||
* |
||||
* @return the phoneme set |
||||
*/ |
||||
public Set<Rule.Phoneme> getPhonemes() { |
||||
return this.phonemes; |
||||
} |
||||
|
||||
/** |
||||
* Stringifies the phoneme set. This produces a single string of the strings of each phoneme, |
||||
* joined with a pipe. This is explicitly provided in place of toString as it is a potentially |
||||
* expensive operation, which should be avoided when debugging. |
||||
* |
||||
* @return the stringified phoneme set |
||||
*/ |
||||
public String makeString() { |
||||
final StringBuilder sb = new StringBuilder(); |
||||
|
||||
for (final Rule.Phoneme ph : this.phonemes) { |
||||
if (sb.length() > 0) { |
||||
sb.append("|"); |
||||
} |
||||
sb.append(ph.getPhonemeText()); |
||||
} |
||||
|
||||
return sb.toString(); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* A function closure capturing the application of a list of rules to an input sequence at a particular offset. |
||||
* After invocation, the values <code>i</code> and <code>found</code> are updated. <code>i</code> points to the |
||||
* index of the next char in <code>input</code> that must be processed next (the input up to that index having been |
||||
* processed already), and <code>found</code> indicates if a matching rule was found or not. In the case where a |
||||
* matching rule was found, <code>phonemeBuilder</code> is replaced with a new builder containing the phonemes |
||||
* updated by the matching rule. |
||||
* |
||||
* Although this class is not thread-safe (it has mutable unprotected fields), it is not shared between threads |
||||
* as it is constructed as needed by the calling methods. |
||||
* @since 1.6 |
||||
*/ |
||||
private static final class RulesApplication { |
||||
private final Map<String, List<Rule>> finalRules; |
||||
private final CharSequence input; |
||||
|
||||
private final PhonemeBuilder phonemeBuilder; |
||||
private int i; |
||||
private final int maxPhonemes; |
||||
private boolean found; |
||||
|
||||
public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequence input, |
||||
final PhonemeBuilder phonemeBuilder, final int i, final int maxPhonemes) { |
||||
if (finalRules == null) { |
||||
throw new NullPointerException("The finalRules argument must not be null"); |
||||
} |
||||
this.finalRules = finalRules; |
||||
this.phonemeBuilder = phonemeBuilder; |
||||
this.input = input; |
||||
this.i = i; |
||||
this.maxPhonemes = maxPhonemes; |
||||
} |
||||
|
||||
public int getI() { |
||||
return this.i; |
||||
} |
||||
|
||||
public PhonemeBuilder getPhonemeBuilder() { |
||||
return this.phonemeBuilder; |
||||
} |
||||
|
||||
/** |
||||
* Invokes the rules. Loops over the rules list, stopping at the first one that has a matching context |
||||
* and pattern. Then applies this rule to the phoneme builder to produce updated phonemes. If there was no |
||||
* match, <code>i</code> is advanced one and the character is silently dropped from the phonetic spelling. |
||||
* |
||||
* @return <code>this</code> |
||||
*/ |
||||
public RulesApplication invoke() { |
||||
this.found = false; |
||||
int patternLength = 1; |
||||
final List<Rule> rules = this.finalRules.get(input.subSequence(i, i+patternLength)); |
||||
if (rules != null) { |
||||
for (final Rule rule : rules) { |
||||
final String pattern = rule.getPattern(); |
||||
patternLength = pattern.length(); |
||||
if (rule.patternAndContextMatches(this.input, this.i)) { |
||||
this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes); |
||||
this.found = true; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
if (!this.found) { |
||||
patternLength = 1; |
||||
} |
||||
|
||||
this.i += patternLength; |
||||
return this; |
||||
} |
||||
|
||||
public boolean isFound() { |
||||
return this.found; |
||||
} |
||||
} |
||||
|
||||
private static final Map<NameType, Set<String>> NAME_PREFIXES = new EnumMap<NameType, Set<String>>(NameType.class); |
||||
|
||||
static { |
||||
NAME_PREFIXES.put(NameType.ASHKENAZI, |
||||
Collections.unmodifiableSet( |
||||
new HashSet<String>(Arrays.asList("bar", "ben", "da", "de", "van", "von")))); |
||||
NAME_PREFIXES.put(NameType.SEPHARDIC, |
||||
Collections.unmodifiableSet( |
||||
new HashSet<String>(Arrays.asList("al", "el", "da", "dal", "de", "del", "dela", "de la", |
||||
"della", "des", "di", "do", "dos", "du", "van", "von")))); |
||||
NAME_PREFIXES.put(NameType.GENERIC, |
||||
Collections.unmodifiableSet( |
||||
new HashSet<String>(Arrays.asList("da", "dal", "de", "del", "dela", "de la", "della", |
||||
"des", "di", "do", "dos", "du", "van", "von")))); |
||||
} |
||||
|
||||
/** |
||||
* Joins some strings with an internal separator. |
||||
* @param strings Strings to join |
||||
* @param sep String to separate them with |
||||
* @return a single String consisting of each element of <code>strings</code> interleaved by <code>sep</code> |
||||
*/ |
||||
private static String join(final Iterable<String> strings, final String sep) { |
||||
final StringBuilder sb = new StringBuilder(); |
||||
final Iterator<String> si = strings.iterator(); |
||||
if (si.hasNext()) { |
||||
sb.append(si.next()); |
||||
} |
||||
while (si.hasNext()) { |
||||
sb.append(sep).append(si.next()); |
||||
} |
||||
|
||||
return sb.toString(); |
||||
} |
||||
|
||||
private static final int DEFAULT_MAX_PHONEMES = 20; |
||||
|
||||
private final Lang lang; |
||||
|
||||
private final NameType nameType; |
||||
|
||||
private final RuleType ruleType; |
||||
|
||||
private final boolean concat; |
||||
|
||||
private final int maxPhonemes; |
||||
|
||||
/** |
||||
* Generates a new, fully-configured phonetic engine. |
||||
* |
||||
* @param nameType |
||||
* the type of names it will use |
||||
* @param ruleType |
||||
* the type of rules it will apply |
||||
* @param concat |
||||
* if it will concatenate multiple encodings |
||||
*/ |
||||
public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat) { |
||||
this(nameType, ruleType, concat, DEFAULT_MAX_PHONEMES); |
||||
} |
||||
|
||||
/** |
||||
* Generates a new, fully-configured phonetic engine. |
||||
* |
||||
* @param nameType |
||||
* the type of names it will use |
||||
* @param ruleType |
||||
* the type of rules it will apply |
||||
* @param concat |
||||
* if it will concatenate multiple encodings |
||||
* @param maxPhonemes |
||||
* the maximum number of phonemes that will be handled |
||||
* @since 1.7 |
||||
*/ |
||||
public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat, |
||||
final int maxPhonemes) { |
||||
if (ruleType == RuleType.RULES) { |
||||
throw new IllegalArgumentException("ruleType must not be " + RuleType.RULES); |
||||
} |
||||
this.nameType = nameType; |
||||
this.ruleType = ruleType; |
||||
this.concat = concat; |
||||
this.lang = Lang.instance(nameType); |
||||
this.maxPhonemes = maxPhonemes; |
||||
} |
||||
|
||||
/** |
||||
* Applies the final rules to convert from a language-specific phonetic representation to a |
||||
* language-independent representation. |
||||
* |
||||
* @param phonemeBuilder the current phonemes |
||||
* @param finalRules the final rules to apply |
||||
* @return the resulting phonemes |
||||
*/ |
||||
private PhonemeBuilder applyFinalRules(final PhonemeBuilder phonemeBuilder, |
||||
final Map<String, List<Rule>> finalRules) { |
||||
if (finalRules == null) { |
||||
throw new NullPointerException("finalRules can not be null"); |
||||
} |
||||
if (finalRules.isEmpty()) { |
||||
return phonemeBuilder; |
||||
} |
||||
|
||||
final Map<Rule.Phoneme, Rule.Phoneme> phonemes = |
||||
new TreeMap<Rule.Phoneme, Rule.Phoneme>(Rule.Phoneme.COMPARATOR); |
||||
|
||||
for (final Rule.Phoneme phoneme : phonemeBuilder.getPhonemes()) { |
||||
PhonemeBuilder subBuilder = PhonemeBuilder.empty(phoneme.getLanguages()); |
||||
final String phonemeText = phoneme.getPhonemeText().toString(); |
||||
|
||||
for (int i = 0; i < phonemeText.length();) { |
||||
final RulesApplication rulesApplication = |
||||
new RulesApplication(finalRules, phonemeText, subBuilder, i, maxPhonemes).invoke(); |
||||
final boolean found = rulesApplication.isFound(); |
||||
subBuilder = rulesApplication.getPhonemeBuilder(); |
||||
|
||||
if (!found) { |
||||
// not found, appending as-is
|
||||
subBuilder.append(phonemeText.subSequence(i, i + 1)); |
||||
} |
||||
|
||||
i = rulesApplication.getI(); |
||||
} |
||||
|
||||
// the phonemes map orders the phonemes only based on their text, but ignores the language set
|
||||
// when adding new phonemes, check for equal phonemes and merge their language set, otherwise
|
||||
// phonemes with the same text but different language set get lost
|
||||
for (final Rule.Phoneme newPhoneme : subBuilder.getPhonemes()) { |
||||
if (phonemes.containsKey(newPhoneme)) { |
||||
final Rule.Phoneme oldPhoneme = phonemes.remove(newPhoneme); |
||||
final Rule.Phoneme mergedPhoneme = oldPhoneme.mergeWithLanguage(newPhoneme.getLanguages()); |
||||
phonemes.put(mergedPhoneme, mergedPhoneme); |
||||
} else { |
||||
phonemes.put(newPhoneme, newPhoneme); |
||||
} |
||||
} |
||||
} |
||||
|
||||
return new PhonemeBuilder(phonemes.keySet()); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string to its phonetic representation. |
||||
* |
||||
* @param input |
||||
* the String to encode |
||||
* @return the encoding of the input |
||||
*/ |
||||
public String encode(final String input) { |
||||
final LanguageSet languageSet = this.lang.guessLanguages(input); |
||||
return encode(input, languageSet); |
||||
} |
||||
|
||||
/** |
||||
* Encodes an input string into an output phonetic representation, given a set of possible origin languages. |
||||
* |
||||
* @param input |
||||
* String to phoneticise; a String with dashes or spaces separating each word |
||||
* @param languageSet |
||||
* set of possible origin languages |
||||
* @return a phonetic representation of the input; a String containing '-'-separated phonetic representations of the |
||||
* input |
||||
*/ |
||||
public String encode(String input, final LanguageSet languageSet) { |
||||
final Map<String, List<Rule>> rules = Rule.getInstanceMap(this.nameType, RuleType.RULES, languageSet); |
||||
// rules common across many (all) languages
|
||||
final Map<String, List<Rule>> finalRules1 = Rule.getInstanceMap(this.nameType, this.ruleType, "common"); |
||||
// rules that apply to a specific language that may be ambiguous or wrong if applied to other languages
|
||||
final Map<String, List<Rule>> finalRules2 = Rule.getInstanceMap(this.nameType, this.ruleType, languageSet); |
||||
|
||||
// tidy the input
|
||||
// lower case is a locale-dependent operation
|
||||
input = input.toLowerCase(Locale.ENGLISH).replace('-', ' ').trim(); |
||||
|
||||
if (this.nameType == NameType.GENERIC) { |
||||
if (input.length() >= 2 && input.substring(0, 2).equals("d'")) { // check for d'
|
||||
final String remainder = input.substring(2); |
||||
final String combined = "d" + remainder; |
||||
return "(" + encode(remainder) + ")-(" + encode(combined) + ")"; |
||||
} |
||||
for (final String l : NAME_PREFIXES.get(this.nameType)) { |
||||
// handle generic prefixes
|
||||
if (input.startsWith(l + " ")) { |
||||
// check for any prefix in the words list
|
||||
final String remainder = input.substring(l.length() + 1); // input without the prefix
|
||||
final String combined = l + remainder; // input with prefix without space
|
||||
return "(" + encode(remainder) + ")-(" + encode(combined) + ")"; |
||||
} |
||||
} |
||||
} |
||||
|
||||
final List<String> words = Arrays.asList(input.split("\\s+")); |
||||
final List<String> words2 = new ArrayList<String>(); |
||||
|
||||
// special-case handling of word prefixes based upon the name type
|
||||
switch (this.nameType) { |
||||
case SEPHARDIC: |
||||
for (final String aWord : words) { |
||||
final String[] parts = aWord.split("'"); |
||||
final String lastPart = parts[parts.length - 1]; |
||||
words2.add(lastPart); |
||||
} |
||||
words2.removeAll(NAME_PREFIXES.get(this.nameType)); |
||||
break; |
||||
case ASHKENAZI: |
||||
words2.addAll(words); |
||||
words2.removeAll(NAME_PREFIXES.get(this.nameType)); |
||||
break; |
||||
case GENERIC: |
||||
words2.addAll(words); |
||||
break; |
||||
default: |
||||
throw new IllegalStateException("Unreachable case: " + this.nameType); |
||||
} |
||||
|
||||
if (this.concat) { |
||||
// concat mode enabled
|
||||
input = join(words2, " "); |
||||
} else if (words2.size() == 1) { |
||||
// not a multi-word name
|
||||
input = words.iterator().next(); |
||||
} else { |
||||
// encode each word in a multi-word name separately (normally used for approx matches)
|
||||
final StringBuilder result = new StringBuilder(); |
||||
for (final String word : words2) { |
||||
result.append("-").append(encode(word)); |
||||
} |
||||
// return the result without the leading "-"
|
||||
return result.substring(1); |
||||
} |
||||
|
||||
PhonemeBuilder phonemeBuilder = PhonemeBuilder.empty(languageSet); |
||||
|
||||
// loop over each char in the input - we will handle the increment manually
|
||||
for (int i = 0; i < input.length();) { |
||||
final RulesApplication rulesApplication = |
||||
new RulesApplication(rules, input, phonemeBuilder, i, maxPhonemes).invoke(); |
||||
i = rulesApplication.getI(); |
||||
phonemeBuilder = rulesApplication.getPhonemeBuilder(); |
||||
} |
||||
|
||||
// Apply the general rules
|
||||
phonemeBuilder = applyFinalRules(phonemeBuilder, finalRules1); |
||||
// Apply the language-specific rules
|
||||
phonemeBuilder = applyFinalRules(phonemeBuilder, finalRules2); |
||||
|
||||
return phonemeBuilder.makeString(); |
||||
} |
||||
|
||||
/** |
||||
* Gets the Lang language guessing rules being used. |
||||
* |
||||
* @return the Lang in use |
||||
*/ |
||||
public Lang getLang() { |
||||
return this.lang; |
||||
} |
||||
|
||||
/** |
||||
* Gets the NameType being used. |
||||
* |
||||
* @return the NameType in use |
||||
*/ |
||||
public NameType getNameType() { |
||||
return this.nameType; |
||||
} |
||||
|
||||
/** |
||||
* Gets the RuleType being used. |
||||
* |
||||
* @return the RuleType in use |
||||
*/ |
||||
public RuleType getRuleType() { |
||||
return this.ruleType; |
||||
} |
||||
|
||||
/** |
||||
* Gets if multiple phonetic encodings are concatenated or if just the first one is kept. |
||||
* |
||||
* @return true if multiple phonetic encodings are returned, false if just the first is |
||||
*/ |
||||
public boolean isConcat() { |
||||
return this.concat; |
||||
} |
||||
|
||||
/** |
||||
* Gets the maximum number of phonemes the engine will calculate for a given input. |
||||
* |
||||
* @return the maximum number of phonemes |
||||
* @since 1.7 |
||||
*/ |
||||
public int getMaxPhonemes() { |
||||
return this.maxPhonemes; |
||||
} |
||||
} |
@ -0,0 +1,37 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language.bm; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.CharEncoding; |
||||
|
||||
/** |
||||
* Constants used to process resource files. |
||||
* |
||||
* <p>This class is immutable and thread-safe.</p> |
||||
* |
||||
* @since 1.6 |
||||
* @version $Id: ResourceConstants.java 1376690 2012-08-23 20:51:45Z tn $ |
||||
*/ |
||||
class ResourceConstants { |
||||
|
||||
static final String CMT = "//"; |
||||
static final String ENCODING = CharEncoding.UTF_8; |
||||
static final String EXT_CMT_END = "*/"; |
||||
static final String EXT_CMT_START = "/*"; |
||||
|
||||
} |
@ -0,0 +1,728 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language.bm; |
||||
|
||||
import java.io.InputStream; |
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.Collections; |
||||
import java.util.Comparator; |
||||
import java.util.EnumMap; |
||||
import java.util.HashMap; |
||||
import java.util.HashSet; |
||||
import java.util.List; |
||||
import java.util.Map; |
||||
import java.util.Scanner; |
||||
import java.util.Set; |
||||
import java.util.regex.Matcher; |
||||
import java.util.regex.Pattern; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.language.bm.Languages.LanguageSet; |
||||
|
||||
/** |
||||
* A phoneme rule. |
||||
* <p> |
||||
* Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply |
||||
* and a logical flag indicating if all languages must be in play. A rule matches if: |
||||
* <ul> |
||||
* <li>the pattern matches at the current position</li> |
||||
* <li>the string up until the beginning of the pattern matches the left context</li> |
||||
* <li>the string from the end of the pattern matches the right context</li> |
||||
* <li>logical is ALL and all languages are in scope; or</li> |
||||
* <li>logical is any other value and at least one language is in scope</li> |
||||
* </ul> |
||||
* <p> |
||||
* Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user |
||||
* to explicitly construct their own. |
||||
* <p> |
||||
* Rules are immutable and thread-safe. |
||||
* <p> |
||||
* <b>Rules resources</b> |
||||
* <p> |
||||
* Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically |
||||
* named following the pattern: |
||||
* <blockquote>org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt</blockquote> |
||||
* <p> |
||||
* The format of these resources is the following: |
||||
* <ul> |
||||
* <li><b>Rules:</b> whitespace separated, double-quoted strings. There should be 4 columns to each row, and these |
||||
* will be interpreted as: |
||||
* <ol> |
||||
* <li>pattern</li> |
||||
* <li>left context</li> |
||||
* <li>right context</li> |
||||
* <li>phoneme</li> |
||||
* </ol> |
||||
* </li> |
||||
* <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be discarded
|
||||
* as a comment.</li> |
||||
* <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. This will skip |
||||
* all content until a line ending in '*' and '/' is found.</li> |
||||
* <li><b>Blank lines:</b> All blank lines will be skipped.</li> |
||||
* </ul> |
||||
* |
||||
* @since 1.6 |
||||
* @version $Id: Rule.java 1760691 2016-09-14 12:14:26Z jochen $ |
||||
*/ |
||||
public class Rule { |
||||
|
||||
public static final class Phoneme implements PhonemeExpr { |
||||
public static final Comparator<Phoneme> COMPARATOR = new Comparator<Phoneme>() { |
||||
@Override |
||||
public int compare(final Phoneme o1, final Phoneme o2) { |
||||
for (int i = 0; i < o1.phonemeText.length(); i++) { |
||||
if (i >= o2.phonemeText.length()) { |
||||
return +1; |
||||
} |
||||
final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i); |
||||
if (c != 0) { |
||||
return c; |
||||
} |
||||
} |
||||
|
||||
if (o1.phonemeText.length() < o2.phonemeText.length()) { |
||||
return -1; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
}; |
||||
|
||||
private final StringBuilder phonemeText; |
||||
private final Languages.LanguageSet languages; |
||||
|
||||
public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) { |
||||
this.phonemeText = new StringBuilder(phonemeText); |
||||
this.languages = languages; |
||||
} |
||||
|
||||
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight) { |
||||
this(phonemeLeft.phonemeText, phonemeLeft.languages); |
||||
this.phonemeText.append(phonemeRight.phonemeText); |
||||
} |
||||
|
||||
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) { |
||||
this(phonemeLeft.phonemeText, languages); |
||||
this.phonemeText.append(phonemeRight.phonemeText); |
||||
} |
||||
|
||||
public Phoneme append(final CharSequence str) { |
||||
this.phonemeText.append(str); |
||||
return this; |
||||
} |
||||
|
||||
public Languages.LanguageSet getLanguages() { |
||||
return this.languages; |
||||
} |
||||
|
||||
@Override |
||||
public Iterable<Phoneme> getPhonemes() { |
||||
return Collections.singleton(this); |
||||
} |
||||
|
||||
public CharSequence getPhonemeText() { |
||||
return this.phonemeText; |
||||
} |
||||
|
||||
/** |
||||
* Deprecated since 1.9. |
||||
* |
||||
* @param right the Phoneme to join |
||||
* @return a new Phoneme |
||||
* @deprecated since 1.9 |
||||
*/ |
||||
@Deprecated |
||||
public Phoneme join(final Phoneme right) { |
||||
return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(), |
||||
this.languages.restrictTo(right.languages)); |
||||
} |
||||
|
||||
/** |
||||
* Returns a new Phoneme with the same text but a union of its |
||||
* current language set and the given one. |
||||
* |
||||
* @param lang the language set to merge |
||||
* @return a new Phoneme |
||||
*/ |
||||
public Phoneme mergeWithLanguage(final LanguageSet lang) { |
||||
return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang)); |
||||
} |
||||
|
||||
@Override |
||||
public String toString() { |
||||
return phonemeText.toString() + "[" + languages + "]"; |
||||
} |
||||
} |
||||
|
||||
public interface PhonemeExpr { |
||||
Iterable<Phoneme> getPhonemes(); |
||||
} |
||||
|
||||
public static final class PhonemeList implements PhonemeExpr { |
||||
private final List<Phoneme> phonemes; |
||||
|
||||
public PhonemeList(final List<Phoneme> phonemes) { |
||||
this.phonemes = phonemes; |
||||
} |
||||
|
||||
@Override |
||||
public List<Phoneme> getPhonemes() { |
||||
return this.phonemes; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations. |
||||
*/ |
||||
public interface RPattern { |
||||
boolean isMatch(CharSequence input); |
||||
} |
||||
|
||||
public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() { |
||||
@Override |
||||
public boolean isMatch(final CharSequence input) { |
||||
return true; |
||||
} |
||||
}; |
||||
|
||||
public static final String ALL = "ALL"; |
||||
|
||||
private static final String DOUBLE_QUOTE = "\""; |
||||
|
||||
private static final String HASH_INCLUDE = "#include"; |
||||
|
||||
private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES = |
||||
new EnumMap<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>>(NameType.class); |
||||
|
||||
static { |
||||
for (final NameType s : NameType.values()) { |
||||
final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts = |
||||
new EnumMap<RuleType, Map<String, Map<String, List<Rule>>>>(RuleType.class); |
||||
|
||||
for (final RuleType rt : RuleType.values()) { |
||||
final Map<String, Map<String, List<Rule>>> rs = new HashMap<String, Map<String, List<Rule>>>(); |
||||
|
||||
final Languages ls = Languages.getInstance(s); |
||||
for (final String l : ls.getLanguages()) { |
||||
final Scanner scanner = createScanner(s, rt, l); |
||||
try { |
||||
rs.put(l, parseRules(scanner, createResourceName(s, rt, l))); |
||||
} catch (final IllegalStateException e) { |
||||
throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e); |
||||
} finally { |
||||
scanner.close(); |
||||
} |
||||
} |
||||
if (!rt.equals(RuleType.RULES)) { |
||||
final Scanner scanner = createScanner(s, rt, "common"); |
||||
try { |
||||
rs.put("common", parseRules(scanner, createResourceName(s, rt, "common"))); |
||||
} finally { |
||||
scanner.close(); |
||||
} |
||||
} |
||||
|
||||
rts.put(rt, Collections.unmodifiableMap(rs)); |
||||
} |
||||
|
||||
RULES.put(s, Collections.unmodifiableMap(rts)); |
||||
} |
||||
} |
||||
|
||||
private static boolean contains(final CharSequence chars, final char input) { |
||||
for (int i = 0; i < chars.length(); i++) { |
||||
if (chars.charAt(i) == input) { |
||||
return true; |
||||
} |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) { |
||||
return String.format("com/fr/third/org/apache/commons/codec/language/bm/%s_%s_%s.txt", |
||||
nameType.getName(), rt.getName(), lang); |
||||
} |
||||
|
||||
private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) { |
||||
final String resName = createResourceName(nameType, rt, lang); |
||||
final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); |
||||
|
||||
if (rulesIS == null) { |
||||
throw new IllegalArgumentException("Unable to load resource: " + resName); |
||||
} |
||||
|
||||
return new Scanner(rulesIS, ResourceConstants.ENCODING); |
||||
} |
||||
|
||||
private static Scanner createScanner(final String lang) { |
||||
final String resName = String.format("com/fr/third/org/apache/commons/codec/language/bm/%s.txt", lang); |
||||
final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); |
||||
|
||||
if (rulesIS == null) { |
||||
throw new IllegalArgumentException("Unable to load resource: " + resName); |
||||
} |
||||
|
||||
return new Scanner(rulesIS, ResourceConstants.ENCODING); |
||||
} |
||||
|
||||
private static boolean endsWith(final CharSequence input, final CharSequence suffix) { |
||||
if (suffix.length() > input.length()) { |
||||
return false; |
||||
} |
||||
for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) { |
||||
if (input.charAt(i) != suffix.charAt(j)) { |
||||
return false; |
||||
} |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
/** |
||||
* Gets rules for a combination of name type, rule type and languages. |
||||
* |
||||
* @param nameType |
||||
* the NameType to consider |
||||
* @param rt |
||||
* the RuleType to consider |
||||
* @param langs |
||||
* the set of languages to consider |
||||
* @return a list of Rules that apply |
||||
*/ |
||||
public static List<Rule> getInstance(final NameType nameType, final RuleType rt, |
||||
final Languages.LanguageSet langs) { |
||||
final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs); |
||||
final List<Rule> allRules = new ArrayList<Rule>(); |
||||
for (final List<Rule> rules : ruleMap.values()) { |
||||
allRules.addAll(rules); |
||||
} |
||||
return allRules; |
||||
} |
||||
|
||||
/** |
||||
* Gets rules for a combination of name type, rule type and a single language. |
||||
* |
||||
* @param nameType |
||||
* the NameType to consider |
||||
* @param rt |
||||
* the RuleType to consider |
||||
* @param lang |
||||
* the language to consider |
||||
* @return a list of Rules that apply |
||||
*/ |
||||
public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) { |
||||
return getInstance(nameType, rt, LanguageSet.from(new HashSet<String>(Arrays.asList(lang)))); |
||||
} |
||||
|
||||
/** |
||||
* Gets rules for a combination of name type, rule type and languages. |
||||
* |
||||
* @param nameType |
||||
* the NameType to consider |
||||
* @param rt |
||||
* the RuleType to consider |
||||
* @param langs |
||||
* the set of languages to consider |
||||
* @return a map containing all Rules that apply, grouped by the first character of the rule pattern |
||||
* @since 1.9 |
||||
*/ |
||||
public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, |
||||
final Languages.LanguageSet langs) { |
||||
return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) : |
||||
getInstanceMap(nameType, rt, Languages.ANY); |
||||
} |
||||
|
||||
/** |
||||
* Gets rules for a combination of name type, rule type and a single language. |
||||
* |
||||
* @param nameType |
||||
* the NameType to consider |
||||
* @param rt |
||||
* the RuleType to consider |
||||
* @param lang |
||||
* the language to consider |
||||
* @return a map containing all Rules that apply, grouped by the first character of the rule pattern |
||||
* @since 1.9 |
||||
*/ |
||||
public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, |
||||
final String lang) { |
||||
final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang); |
||||
|
||||
if (rules == null) { |
||||
throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.", |
||||
nameType.getName(), rt.getName(), lang)); |
||||
} |
||||
|
||||
return rules; |
||||
} |
||||
|
||||
private static Phoneme parsePhoneme(final String ph) { |
||||
final int open = ph.indexOf("["); |
||||
if (open >= 0) { |
||||
if (!ph.endsWith("]")) { |
||||
throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'"); |
||||
} |
||||
final String before = ph.substring(0, open); |
||||
final String in = ph.substring(open + 1, ph.length() - 1); |
||||
final Set<String> langs = new HashSet<String>(Arrays.asList(in.split("[+]"))); |
||||
|
||||
return new Phoneme(before, Languages.LanguageSet.from(langs)); |
||||
} |
||||
return new Phoneme(ph, Languages.ANY_LANGUAGE); |
||||
} |
||||
|
||||
private static PhonemeExpr parsePhonemeExpr(final String ph) { |
||||
if (ph.startsWith("(")) { // we have a bracketed list of options
|
||||
if (!ph.endsWith(")")) { |
||||
throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'"); |
||||
} |
||||
|
||||
final List<Phoneme> phs = new ArrayList<Phoneme>(); |
||||
final String body = ph.substring(1, ph.length() - 1); |
||||
for (final String part : body.split("[|]")) { |
||||
phs.add(parsePhoneme(part)); |
||||
} |
||||
if (body.startsWith("|") || body.endsWith("|")) { |
||||
phs.add(new Phoneme("", Languages.ANY_LANGUAGE)); |
||||
} |
||||
|
||||
return new PhonemeList(phs); |
||||
} |
||||
return parsePhoneme(ph); |
||||
} |
||||
|
||||
private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) { |
||||
final Map<String, List<Rule>> lines = new HashMap<String, List<Rule>>(); |
||||
int currentLine = 0; |
||||
|
||||
boolean inMultilineComment = false; |
||||
while (scanner.hasNextLine()) { |
||||
currentLine++; |
||||
final String rawLine = scanner.nextLine(); |
||||
String line = rawLine; |
||||
|
||||
if (inMultilineComment) { |
||||
if (line.endsWith(ResourceConstants.EXT_CMT_END)) { |
||||
inMultilineComment = false; |
||||
} |
||||
} else { |
||||
if (line.startsWith(ResourceConstants.EXT_CMT_START)) { |
||||
inMultilineComment = true; |
||||
} else { |
||||
// discard comments
|
||||
final int cmtI = line.indexOf(ResourceConstants.CMT); |
||||
if (cmtI >= 0) { |
||||
line = line.substring(0, cmtI); |
||||
} |
||||
|
||||
// trim leading-trailing whitespace
|
||||
line = line.trim(); |
||||
|
||||
if (line.length() == 0) { |
||||
continue; // empty lines can be safely skipped
|
||||
} |
||||
|
||||
if (line.startsWith(HASH_INCLUDE)) { |
||||
// include statement
|
||||
final String incl = line.substring(HASH_INCLUDE.length()).trim(); |
||||
if (incl.contains(" ")) { |
||||
throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " + |
||||
location); |
||||
} |
||||
final Scanner hashIncludeScanner = createScanner(incl); |
||||
try { |
||||
lines.putAll(parseRules(hashIncludeScanner, location + "->" + incl)); |
||||
} finally { |
||||
hashIncludeScanner.close(); |
||||
} |
||||
} else { |
||||
// rule
|
||||
final String[] parts = line.split("\\s+"); |
||||
if (parts.length != 4) { |
||||
throw new IllegalArgumentException("Malformed rule statement split into " + parts.length + |
||||
" parts: " + rawLine + " in " + location); |
||||
} |
||||
try { |
||||
final String pat = stripQuotes(parts[0]); |
||||
final String lCon = stripQuotes(parts[1]); |
||||
final String rCon = stripQuotes(parts[2]); |
||||
final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3])); |
||||
final int cLine = currentLine; |
||||
final Rule r = new Rule(pat, lCon, rCon, ph) { |
||||
private final int myLine = cLine; |
||||
private final String loc = location; |
||||
|
||||
@Override |
||||
public String toString() { |
||||
final StringBuilder sb = new StringBuilder(); |
||||
sb.append("Rule"); |
||||
sb.append("{line=").append(myLine); |
||||
sb.append(", loc='").append(loc).append('\''); |
||||
sb.append(", pat='").append(pat).append('\''); |
||||
sb.append(", lcon='").append(lCon).append('\''); |
||||
sb.append(", rcon='").append(rCon).append('\''); |
||||
sb.append('}'); |
||||
return sb.toString(); |
||||
} |
||||
}; |
||||
final String patternKey = r.pattern.substring(0,1); |
||||
List<Rule> rules = lines.get(patternKey); |
||||
if (rules == null) { |
||||
rules = new ArrayList<Rule>(); |
||||
lines.put(patternKey, rules); |
||||
} |
||||
rules.add(r); |
||||
} catch (final IllegalArgumentException e) { |
||||
throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " + |
||||
location, e); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
return lines; |
||||
} |
||||
|
||||
/** |
||||
* Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case. |
||||
* |
||||
* @param regex |
||||
* the regular expression to compile |
||||
* @return an RPattern that will match this regex |
||||
*/ |
||||
private static RPattern pattern(final String regex) { |
||||
final boolean startsWith = regex.startsWith("^"); |
||||
final boolean endsWith = regex.endsWith("$"); |
||||
final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length()); |
||||
final boolean boxes = content.contains("["); |
||||
|
||||
if (!boxes) { |
||||
if (startsWith && endsWith) { |
||||
// exact match
|
||||
if (content.length() == 0) { |
||||
// empty
|
||||
return new RPattern() { |
||||
@Override |
||||
public boolean isMatch(final CharSequence input) { |
||||
return input.length() == 0; |
||||
} |
||||
}; |
||||
} |
||||
return new RPattern() { |
||||
@Override |
||||
public boolean isMatch(final CharSequence input) { |
||||
return input.equals(content); |
||||
} |
||||
}; |
||||
} else if ((startsWith || endsWith) && content.length() == 0) { |
||||
// matches every string
|
||||
return ALL_STRINGS_RMATCHER; |
||||
} else if (startsWith) { |
||||
// matches from start
|
||||
return new RPattern() { |
||||
@Override |
||||
public boolean isMatch(final CharSequence input) { |
||||
return startsWith(input, content); |
||||
} |
||||
}; |
||||
} else if (endsWith) { |
||||
// matches from start
|
||||
return new RPattern() { |
||||
@Override |
||||
public boolean isMatch(final CharSequence input) { |
||||
return endsWith(input, content); |
||||
} |
||||
}; |
||||
} |
||||
} else { |
||||
final boolean startsWithBox = content.startsWith("["); |
||||
final boolean endsWithBox = content.endsWith("]"); |
||||
|
||||
if (startsWithBox && endsWithBox) { |
||||
String boxContent = content.substring(1, content.length() - 1); |
||||
if (!boxContent.contains("[")) { |
||||
// box containing alternatives
|
||||
final boolean negate = boxContent.startsWith("^"); |
||||
if (negate) { |
||||
boxContent = boxContent.substring(1); |
||||
} |
||||
final String bContent = boxContent; |
||||
final boolean shouldMatch = !negate; |
||||
|
||||
if (startsWith && endsWith) { |
||||
// exact match
|
||||
return new RPattern() { |
||||
@Override |
||||
public boolean isMatch(final CharSequence input) { |
||||
return input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch; |
||||
} |
||||
}; |
||||
} else if (startsWith) { |
||||
// first char
|
||||
return new RPattern() { |
||||
@Override |
||||
public boolean isMatch(final CharSequence input) { |
||||
return input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch; |
||||
} |
||||
}; |
||||
} else if (endsWith) { |
||||
// last char
|
||||
return new RPattern() { |
||||
@Override |
||||
public boolean isMatch(final CharSequence input) { |
||||
return input.length() > 0 && |
||||
contains(bContent, input.charAt(input.length() - 1)) == shouldMatch; |
||||
} |
||||
}; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
return new RPattern() { |
||||
Pattern pattern = Pattern.compile(regex); |
||||
|
||||
@Override |
||||
public boolean isMatch(final CharSequence input) { |
||||
final Matcher matcher = pattern.matcher(input); |
||||
return matcher.find(); |
||||
} |
||||
}; |
||||
} |
||||
|
||||
private static boolean startsWith(final CharSequence input, final CharSequence prefix) { |
||||
if (prefix.length() > input.length()) { |
||||
return false; |
||||
} |
||||
for (int i = 0; i < prefix.length(); i++) { |
||||
if (input.charAt(i) != prefix.charAt(i)) { |
||||
return false; |
||||
} |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
private static String stripQuotes(String str) { |
||||
if (str.startsWith(DOUBLE_QUOTE)) { |
||||
str = str.substring(1); |
||||
} |
||||
|
||||
if (str.endsWith(DOUBLE_QUOTE)) { |
||||
str = str.substring(0, str.length() - 1); |
||||
} |
||||
|
||||
return str; |
||||
} |
||||
|
||||
private final RPattern lContext; |
||||
|
||||
private final String pattern; |
||||
|
||||
private final PhonemeExpr phoneme; |
||||
|
||||
private final RPattern rContext; |
||||
|
||||
/** |
||||
* Creates a new rule. |
||||
* |
||||
* @param pattern |
||||
* the pattern |
||||
* @param lContext |
||||
* the left context |
||||
* @param rContext |
||||
* the right context |
||||
* @param phoneme |
||||
* the resulting phoneme |
||||
*/ |
||||
public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) { |
||||
this.pattern = pattern; |
||||
this.lContext = pattern(lContext + "$"); |
||||
this.rContext = pattern("^" + rContext); |
||||
this.phoneme = phoneme; |
||||
} |
||||
|
||||
/** |
||||
* Gets the left context. This is a regular expression that must match to the left of the pattern. |
||||
* |
||||
* @return the left context Pattern |
||||
*/ |
||||
public RPattern getLContext() { |
||||
return this.lContext; |
||||
} |
||||
|
||||
/** |
||||
* Gets the pattern. This is a string-literal that must exactly match. |
||||
* |
||||
* @return the pattern |
||||
*/ |
||||
public String getPattern() { |
||||
return this.pattern; |
||||
} |
||||
|
||||
/** |
||||
* Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match. |
||||
* |
||||
* @return the phoneme |
||||
*/ |
||||
public PhonemeExpr getPhoneme() { |
||||
return this.phoneme; |
||||
} |
||||
|
||||
/** |
||||
* Gets the right context. This is a regular expression that must match to the right of the pattern. |
||||
* |
||||
* @return the right context Pattern |
||||
*/ |
||||
public RPattern getRContext() { |
||||
return this.rContext; |
||||
} |
||||
|
||||
/** |
||||
* Decides if the pattern and context match the input starting at a position. It is a match if the |
||||
* <code>lContext</code> matches <code>input</code> up to <code>i</code>, <code>pattern</code> matches at i and |
||||
* <code>rContext</code> matches from the end of the match of <code>pattern</code> to the end of <code>input</code>. |
||||
* |
||||
* @param input |
||||
* the input String |
||||
* @param i |
||||
* the int position within the input |
||||
* @return true if the pattern and left/right context match, false otherwise |
||||
*/ |
||||
public boolean patternAndContextMatches(final CharSequence input, final int i) { |
||||
if (i < 0) { |
||||
throw new IndexOutOfBoundsException("Can not match pattern at negative indexes"); |
||||
} |
||||
|
||||
final int patternLength = this.pattern.length(); |
||||
final int ipl = i + patternLength; |
||||
|
||||
if (ipl > input.length()) { |
||||
// not enough room for the pattern to match
|
||||
return false; |
||||
} |
||||
|
||||
// evaluate the pattern, left context and right context
|
||||
// fail early if any of the evaluations is not successful
|
||||
if (!input.subSequence(i, ipl).equals(this.pattern)) { |
||||
return false; |
||||
} else if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) { |
||||
return false; |
||||
} |
||||
return this.lContext.isMatch(input.subSequence(0, i)); |
||||
} |
||||
} |
@ -0,0 +1,50 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.language.bm; |
||||
|
||||
/** |
||||
* Types of rule. |
||||
* |
||||
* @since 1.6 |
||||
* @version $Id: RuleType.java 1542813 2013-11-17 20:52:32Z tn $ |
||||
*/ |
||||
public enum RuleType { |
||||
|
||||
/** Approximate rules, which will lead to the largest number of phonetic interpretations. */ |
||||
APPROX("approx"), |
||||
/** Exact rules, which will lead to a minimum number of phonetic interpretations. */ |
||||
EXACT("exact"), |
||||
/** For internal use only. Please use {@link #APPROX} or {@link #EXACT}. */ |
||||
RULES("rules"); |
||||
|
||||
private final String name; |
||||
|
||||
RuleType(final String name) { |
||||
this.name = name; |
||||
} |
||||
|
||||
/** |
||||
* Gets the rule name. |
||||
* |
||||
* @return the rule name. |
||||
*/ |
||||
public String getName() { |
||||
return this.name; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,21 @@
|
||||
<!-- |
||||
Licensed to the Apache Software Foundation (ASF) under one or more |
||||
contributor license agreements. See the NOTICE file distributed with |
||||
this work for additional information regarding copyright ownership. |
||||
The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
(the "License"); you may not use this file except in compliance with |
||||
the License. You may obtain a copy of the License at |
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
Unless required by applicable law or agreed to in writing, software |
||||
distributed under the License is distributed on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
See the License for the specific language governing permissions and |
||||
limitations under the License. |
||||
--> |
||||
<html> |
||||
<body> |
||||
Implementation details of the Beider-Morse codec. |
||||
</body> |
||||
</html> |
@ -0,0 +1,21 @@
|
||||
<!-- |
||||
Licensed to the Apache Software Foundation (ASF) under one or more |
||||
contributor license agreements. See the NOTICE file distributed with |
||||
this work for additional information regarding copyright ownership. |
||||
The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
(the "License"); you may not use this file except in compliance with |
||||
the License. You may obtain a copy of the License at |
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
Unless required by applicable law or agreed to in writing, software |
||||
distributed under the License is distributed on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
See the License for the specific language governing permissions and |
||||
limitations under the License. |
||||
--> |
||||
<html> |
||||
<body> |
||||
Language and phonetic encoders. |
||||
</body> |
||||
</html> |
@ -0,0 +1,251 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.net; |
||||
|
||||
import java.io.UnsupportedEncodingException; |
||||
import java.nio.charset.Charset; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.binary.Base64; |
||||
import com.fr.third.org.apache.commons.codec.Charsets; |
||||
import com.fr.third.org.apache.commons.codec.DecoderException; |
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringDecoder; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Identical to the Base64 encoding defined by <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> |
||||
* and allows a character set to be specified. |
||||
* <p> |
||||
* <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII |
||||
* text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message |
||||
* handling software. |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* |
||||
* @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message |
||||
* Header Extensions for Non-ASCII Text</a> |
||||
* |
||||
* @since 1.3 |
||||
* @version $Id: BCodec.java 1429868 2013-01-07 16:08:05Z ggregory $ |
||||
*/ |
||||
public class BCodec extends RFC1522Codec implements StringEncoder, StringDecoder { |
||||
/** |
||||
* The default charset used for string decoding and encoding. |
||||
*/ |
||||
private final Charset charset; |
||||
|
||||
/** |
||||
* Default constructor. |
||||
*/ |
||||
public BCodec() { |
||||
this(Charsets.UTF_8); |
||||
} |
||||
|
||||
/** |
||||
* Constructor which allows for the selection of a default charset |
||||
* |
||||
* @param charset |
||||
* the default string charset to use. |
||||
* |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @since 1.7 |
||||
*/ |
||||
public BCodec(final Charset charset) { |
||||
this.charset = charset; |
||||
} |
||||
|
||||
/** |
||||
* Constructor which allows for the selection of a default charset |
||||
* |
||||
* @param charsetName |
||||
* the default charset to use. |
||||
* @throws java.nio.charset.UnsupportedCharsetException |
||||
* If the named charset is unavailable |
||||
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public BCodec(final String charsetName) { |
||||
this(Charset.forName(charsetName)); |
||||
} |
||||
|
||||
@Override |
||||
protected String getEncoding() { |
||||
return "B"; |
||||
} |
||||
|
||||
@Override |
||||
protected byte[] doEncoding(final byte[] bytes) { |
||||
if (bytes == null) { |
||||
return null; |
||||
} |
||||
return Base64.encodeBase64(bytes); |
||||
} |
||||
|
||||
@Override |
||||
protected byte[] doDecoding(final byte[] bytes) { |
||||
if (bytes == null) { |
||||
return null; |
||||
} |
||||
return Base64.decodeBase64(bytes); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string into its Base64 form using the specified charset. Unsafe characters are escaped. |
||||
* |
||||
* @param value |
||||
* string to convert to Base64 form |
||||
* @param charset |
||||
* the charset for <code>value</code> |
||||
* @return Base64 string |
||||
* @throws EncoderException |
||||
* thrown if a failure condition is encountered during the encoding process. |
||||
* @since 1.7 |
||||
*/ |
||||
public String encode(final String value, final Charset charset) throws EncoderException { |
||||
if (value == null) { |
||||
return null; |
||||
} |
||||
return encodeText(value, charset); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string into its Base64 form using the specified charset. Unsafe characters are escaped. |
||||
* |
||||
* @param value |
||||
* string to convert to Base64 form |
||||
* @param charset |
||||
* the charset for <code>value</code> |
||||
* @return Base64 string |
||||
* @throws EncoderException |
||||
* thrown if a failure condition is encountered during the encoding process. |
||||
*/ |
||||
public String encode(final String value, final String charset) throws EncoderException { |
||||
if (value == null) { |
||||
return null; |
||||
} |
||||
try { |
||||
return this.encodeText(value, charset); |
||||
} catch (final UnsupportedEncodingException e) { |
||||
throw new EncoderException(e.getMessage(), e); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string into its Base64 form using the default charset. Unsafe characters are escaped. |
||||
* |
||||
* @param value |
||||
* string to convert to Base64 form |
||||
* @return Base64 string |
||||
* @throws EncoderException |
||||
* thrown if a failure condition is encountered during the encoding process. |
||||
*/ |
||||
@Override |
||||
public String encode(final String value) throws EncoderException { |
||||
if (value == null) { |
||||
return null; |
||||
} |
||||
return encode(value, this.getCharset()); |
||||
} |
||||
|
||||
/** |
||||
* Decodes a Base64 string into its original form. Escaped characters are converted back to their original |
||||
* representation. |
||||
* |
||||
* @param value |
||||
* Base64 string to convert into its original form |
||||
* @return original string |
||||
* @throws DecoderException |
||||
* A decoder exception is thrown if a failure condition is encountered during the decode process. |
||||
*/ |
||||
@Override |
||||
public String decode(final String value) throws DecoderException { |
||||
if (value == null) { |
||||
return null; |
||||
} |
||||
try { |
||||
return this.decodeText(value); |
||||
} catch (final UnsupportedEncodingException e) { |
||||
throw new DecoderException(e.getMessage(), e); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Encodes an object into its Base64 form using the default charset. Unsafe characters are escaped. |
||||
* |
||||
* @param value |
||||
* object to convert to Base64 form |
||||
* @return Base64 object |
||||
* @throws EncoderException |
||||
* thrown if a failure condition is encountered during the encoding process. |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object value) throws EncoderException { |
||||
if (value == null) { |
||||
return null; |
||||
} else if (value instanceof String) { |
||||
return encode((String) value); |
||||
} else { |
||||
throw new EncoderException("Objects of type " + |
||||
value.getClass().getName() + |
||||
" cannot be encoded using BCodec"); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Decodes a Base64 object into its original form. Escaped characters are converted back to their original |
||||
* representation. |
||||
* |
||||
* @param value |
||||
* Base64 object to convert into its original form |
||||
* @return original object |
||||
* @throws DecoderException |
||||
* Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered |
||||
* during the decode process. |
||||
*/ |
||||
@Override |
||||
public Object decode(final Object value) throws DecoderException { |
||||
if (value == null) { |
||||
return null; |
||||
} else if (value instanceof String) { |
||||
return decode((String) value); |
||||
} else { |
||||
throw new DecoderException("Objects of type " + |
||||
value.getClass().getName() + |
||||
" cannot be decoded using BCodec"); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Gets the default charset name used for string decoding and encoding. |
||||
* |
||||
* @return the default charset name |
||||
* @since 1.7 |
||||
*/ |
||||
public Charset getCharset() { |
||||
return this.charset; |
||||
} |
||||
|
||||
/** |
||||
* Gets the default charset name used for string decoding and encoding. |
||||
* |
||||
* @return the default charset name |
||||
*/ |
||||
public String getDefaultCharset() { |
||||
return this.charset.name(); |
||||
} |
||||
} |
@ -0,0 +1,358 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.net; |
||||
|
||||
import java.io.UnsupportedEncodingException; |
||||
import java.nio.charset.Charset; |
||||
import java.util.BitSet; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.Charsets; |
||||
import com.fr.third.org.apache.commons.codec.DecoderException; |
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringDecoder; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Similar to the Quoted-Printable content-transfer-encoding defined in |
||||
* <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> and designed to allow text containing mostly ASCII |
||||
* characters to be decipherable on an ASCII terminal without decoding. |
||||
* <p> |
||||
* <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII |
||||
* text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message |
||||
* handling software. |
||||
* <p> |
||||
* This class is conditionally thread-safe. |
||||
* The instance field {@link #encodeBlanks} is mutable {@link #setEncodeBlanks(boolean)} |
||||
* but is not volatile, and accesses are not synchronised. |
||||
* If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronisation |
||||
* is used to ensure safe publication of the value between threads, and must not invoke |
||||
* {@link #setEncodeBlanks(boolean)} after initial setup. |
||||
* |
||||
* @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message |
||||
* Header Extensions for Non-ASCII Text</a> |
||||
* |
||||
* @since 1.3 |
||||
* @version $Id: QCodec.java 1619948 2014-08-22 22:53:55Z ggregory $ |
||||
*/ |
||||
public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder { |
||||
/** |
||||
* The default charset used for string decoding and encoding. |
||||
*/ |
||||
private final Charset charset; |
||||
|
||||
/** |
||||
* BitSet of printable characters as defined in RFC 1522. |
||||
*/ |
||||
private static final BitSet PRINTABLE_CHARS = new BitSet(256); |
||||
// Static initializer for printable chars collection
|
||||
static { |
||||
// alpha characters
|
||||
PRINTABLE_CHARS.set(' '); |
||||
PRINTABLE_CHARS.set('!'); |
||||
PRINTABLE_CHARS.set('"'); |
||||
PRINTABLE_CHARS.set('#'); |
||||
PRINTABLE_CHARS.set('$'); |
||||
PRINTABLE_CHARS.set('%'); |
||||
PRINTABLE_CHARS.set('&'); |
||||
PRINTABLE_CHARS.set('\''); |
||||
PRINTABLE_CHARS.set('('); |
||||
PRINTABLE_CHARS.set(')'); |
||||
PRINTABLE_CHARS.set('*'); |
||||
PRINTABLE_CHARS.set('+'); |
||||
PRINTABLE_CHARS.set(','); |
||||
PRINTABLE_CHARS.set('-'); |
||||
PRINTABLE_CHARS.set('.'); |
||||
PRINTABLE_CHARS.set('/'); |
||||
for (int i = '0'; i <= '9'; i++) { |
||||
PRINTABLE_CHARS.set(i); |
||||
} |
||||
PRINTABLE_CHARS.set(':'); |
||||
PRINTABLE_CHARS.set(';'); |
||||
PRINTABLE_CHARS.set('<'); |
||||
PRINTABLE_CHARS.set('>'); |
||||
PRINTABLE_CHARS.set('@'); |
||||
for (int i = 'A'; i <= 'Z'; i++) { |
||||
PRINTABLE_CHARS.set(i); |
||||
} |
||||
PRINTABLE_CHARS.set('['); |
||||
PRINTABLE_CHARS.set('\\'); |
||||
PRINTABLE_CHARS.set(']'); |
||||
PRINTABLE_CHARS.set('^'); |
||||
PRINTABLE_CHARS.set('`'); |
||||
for (int i = 'a'; i <= 'z'; i++) { |
||||
PRINTABLE_CHARS.set(i); |
||||
} |
||||
PRINTABLE_CHARS.set('{'); |
||||
PRINTABLE_CHARS.set('|'); |
||||
PRINTABLE_CHARS.set('}'); |
||||
PRINTABLE_CHARS.set('~'); |
||||
} |
||||
|
||||
private static final byte BLANK = 32; |
||||
|
||||
private static final byte UNDERSCORE = 95; |
||||
|
||||
private boolean encodeBlanks = false; |
||||
|
||||
/** |
||||
* Default constructor. |
||||
*/ |
||||
public QCodec() { |
||||
this(Charsets.UTF_8); |
||||
} |
||||
|
||||
/** |
||||
* Constructor which allows for the selection of a default charset. |
||||
* |
||||
* @param charset |
||||
* the default string charset to use. |
||||
* |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
* @since 1.7 |
||||
*/ |
||||
public QCodec(final Charset charset) { |
||||
super(); |
||||
this.charset = charset; |
||||
} |
||||
|
||||
/** |
||||
* Constructor which allows for the selection of a default charset. |
||||
* |
||||
* @param charsetName |
||||
* the charset to use. |
||||
* @throws java.nio.charset.UnsupportedCharsetException |
||||
* If the named charset is unavailable |
||||
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
public QCodec(final String charsetName) { |
||||
this(Charset.forName(charsetName)); |
||||
} |
||||
|
||||
@Override |
||||
protected String getEncoding() { |
||||
return "Q"; |
||||
} |
||||
|
||||
@Override |
||||
protected byte[] doEncoding(final byte[] bytes) { |
||||
if (bytes == null) { |
||||
return null; |
||||
} |
||||
final byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes); |
||||
if (this.encodeBlanks) { |
||||
for (int i = 0; i < data.length; i++) { |
||||
if (data[i] == BLANK) { |
||||
data[i] = UNDERSCORE; |
||||
} |
||||
} |
||||
} |
||||
return data; |
||||
} |
||||
|
||||
@Override |
||||
protected byte[] doDecoding(final byte[] bytes) throws DecoderException { |
||||
if (bytes == null) { |
||||
return null; |
||||
} |
||||
boolean hasUnderscores = false; |
||||
for (final byte b : bytes) { |
||||
if (b == UNDERSCORE) { |
||||
hasUnderscores = true; |
||||
break; |
||||
} |
||||
} |
||||
if (hasUnderscores) { |
||||
final byte[] tmp = new byte[bytes.length]; |
||||
for (int i = 0; i < bytes.length; i++) { |
||||
final byte b = bytes[i]; |
||||
if (b != UNDERSCORE) { |
||||
tmp[i] = b; |
||||
} else { |
||||
tmp[i] = BLANK; |
||||
} |
||||
} |
||||
return QuotedPrintableCodec.decodeQuotedPrintable(tmp); |
||||
} |
||||
return QuotedPrintableCodec.decodeQuotedPrintable(bytes); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. |
||||
* |
||||
* @param str |
||||
* string to convert to quoted-printable form |
||||
* @param charset |
||||
* the charset for str |
||||
* @return quoted-printable string |
||||
* @throws EncoderException |
||||
* thrown if a failure condition is encountered during the encoding process. |
||||
* @since 1.7 |
||||
*/ |
||||
public String encode(final String str, final Charset charset) throws EncoderException { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
return encodeText(str, charset); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. |
||||
* |
||||
* @param str |
||||
* string to convert to quoted-printable form |
||||
* @param charset |
||||
* the charset for str |
||||
* @return quoted-printable string |
||||
* @throws EncoderException |
||||
* thrown if a failure condition is encountered during the encoding process. |
||||
*/ |
||||
public String encode(final String str, final String charset) throws EncoderException { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
try { |
||||
return encodeText(str, charset); |
||||
} catch (final UnsupportedEncodingException e) { |
||||
throw new EncoderException(e.getMessage(), e); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string into its quoted-printable form using the default charset. Unsafe characters are escaped. |
||||
* |
||||
* @param str |
||||
* string to convert to quoted-printable form |
||||
* @return quoted-printable string |
||||
* @throws EncoderException |
||||
* thrown if a failure condition is encountered during the encoding process. |
||||
*/ |
||||
@Override |
||||
public String encode(final String str) throws EncoderException { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
return encode(str, getCharset()); |
||||
} |
||||
|
||||
/** |
||||
* Decodes a quoted-printable string into its original form. Escaped characters are converted back to their original |
||||
* representation. |
||||
* |
||||
* @param str |
||||
* quoted-printable string to convert into its original form |
||||
* @return original string |
||||
* @throws DecoderException |
||||
* A decoder exception is thrown if a failure condition is encountered during the decode process. |
||||
*/ |
||||
@Override |
||||
public String decode(final String str) throws DecoderException { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
try { |
||||
return decodeText(str); |
||||
} catch (final UnsupportedEncodingException e) { |
||||
throw new DecoderException(e.getMessage(), e); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Encodes an object into its quoted-printable form using the default charset. Unsafe characters are escaped. |
||||
* |
||||
* @param obj |
||||
* object to convert to quoted-printable form |
||||
* @return quoted-printable object |
||||
* @throws EncoderException |
||||
* thrown if a failure condition is encountered during the encoding process. |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object obj) throws EncoderException { |
||||
if (obj == null) { |
||||
return null; |
||||
} else if (obj instanceof String) { |
||||
return encode((String) obj); |
||||
} else { |
||||
throw new EncoderException("Objects of type " + |
||||
obj.getClass().getName() + |
||||
" cannot be encoded using Q codec"); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original |
||||
* representation. |
||||
* |
||||
* @param obj |
||||
* quoted-printable object to convert into its original form |
||||
* @return original object |
||||
* @throws DecoderException |
||||
* Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered |
||||
* during the decode process. |
||||
*/ |
||||
@Override |
||||
public Object decode(final Object obj) throws DecoderException { |
||||
if (obj == null) { |
||||
return null; |
||||
} else if (obj instanceof String) { |
||||
return decode((String) obj); |
||||
} else { |
||||
throw new DecoderException("Objects of type " + |
||||
obj.getClass().getName() + |
||||
" cannot be decoded using Q codec"); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Gets the default charset name used for string decoding and encoding. |
||||
* |
||||
* @return the default charset name |
||||
* @since 1.7 |
||||
*/ |
||||
public Charset getCharset() { |
||||
return this.charset; |
||||
} |
||||
|
||||
/** |
||||
* Gets the default charset name used for string decoding and encoding. |
||||
* |
||||
* @return the default charset name |
||||
*/ |
||||
public String getDefaultCharset() { |
||||
return this.charset.name(); |
||||
} |
||||
|
||||
/** |
||||
* Tests if optional transformation of SPACE characters is to be used |
||||
* |
||||
* @return <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise |
||||
*/ |
||||
public boolean isEncodeBlanks() { |
||||
return this.encodeBlanks; |
||||
} |
||||
|
||||
/** |
||||
* Defines whether optional transformation of SPACE characters is to be used |
||||
* |
||||
* @param b |
||||
* <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise |
||||
*/ |
||||
public void setEncodeBlanks(final boolean b) { |
||||
this.encodeBlanks = b; |
||||
} |
||||
} |
@ -0,0 +1,601 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.net; |
||||
|
||||
import java.io.ByteArrayOutputStream; |
||||
import java.io.UnsupportedEncodingException; |
||||
import java.nio.charset.Charset; |
||||
import java.nio.charset.IllegalCharsetNameException; |
||||
import java.nio.charset.UnsupportedCharsetException; |
||||
import java.util.BitSet; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.binary.StringUtils; |
||||
import com.fr.third.org.apache.commons.codec.BinaryDecoder; |
||||
import com.fr.third.org.apache.commons.codec.BinaryEncoder; |
||||
import com.fr.third.org.apache.commons.codec.Charsets; |
||||
import com.fr.third.org.apache.commons.codec.DecoderException; |
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringDecoder; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>. |
||||
* <p> |
||||
* The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to |
||||
* printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are |
||||
* unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the |
||||
* data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable |
||||
* to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping |
||||
* gateway. |
||||
* <p> |
||||
* Note: |
||||
* <p> |
||||
* Depending on the selected {@code strict} parameter, this class will implement a different set of rules of the |
||||
* quoted-printable spec: |
||||
* <ul> |
||||
* <li>{@code strict=false}: only rules #1 and #2 are implemented |
||||
* <li>{@code strict=true}: all rules #1 through #5 are implemented |
||||
* </ul> |
||||
* Originally, this class only supported the non-strict mode, but the codec in this partial form could already be used |
||||
* for certain applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance |
||||
* Q codec. The strict mode has been added in 1.10. |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* |
||||
* @see <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One: |
||||
* Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a> |
||||
* |
||||
* @since 1.3 |
||||
* @version $Id: QuotedPrintableCodec.java 1788792 2017-03-26 23:57:00Z sebb $ |
||||
*/ |
||||
public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { |
||||
/** |
||||
* The default charset used for string decoding and encoding. |
||||
*/ |
||||
private final Charset charset; |
||||
|
||||
/** |
||||
* Indicates whether soft line breaks shall be used during encoding (rule #3-5). |
||||
*/ |
||||
private final boolean strict; |
||||
|
||||
/** |
||||
* BitSet of printable characters as defined in RFC 1521. |
||||
*/ |
||||
private static final BitSet PRINTABLE_CHARS = new BitSet(256); |
||||
|
||||
private static final byte ESCAPE_CHAR = '='; |
||||
|
||||
private static final byte TAB = 9; |
||||
|
||||
private static final byte SPACE = 32; |
||||
|
||||
private static final byte CR = 13; |
||||
|
||||
private static final byte LF = 10; |
||||
|
||||
/** |
||||
* Safe line length for quoted printable encoded text. |
||||
*/ |
||||
private static final int SAFE_LENGTH = 73; |
||||
|
||||
// Static initializer for printable chars collection
|
||||
static { |
||||
// alpha characters
|
||||
for (int i = 33; i <= 60; i++) { |
||||
PRINTABLE_CHARS.set(i); |
||||
} |
||||
for (int i = 62; i <= 126; i++) { |
||||
PRINTABLE_CHARS.set(i); |
||||
} |
||||
PRINTABLE_CHARS.set(TAB); |
||||
PRINTABLE_CHARS.set(SPACE); |
||||
} |
||||
|
||||
/** |
||||
* Default constructor, assumes default charset of {@link Charsets#UTF_8} |
||||
*/ |
||||
public QuotedPrintableCodec() { |
||||
this(Charsets.UTF_8, false); |
||||
} |
||||
|
||||
/** |
||||
* Constructor which allows for the selection of the strict mode. |
||||
* |
||||
* @param strict |
||||
* if {@code true}, soft line breaks will be used |
||||
* @since 1.10 |
||||
*/ |
||||
public QuotedPrintableCodec(final boolean strict) { |
||||
this(Charsets.UTF_8, strict); |
||||
} |
||||
|
||||
/** |
||||
* Constructor which allows for the selection of a default charset. |
||||
* |
||||
* @param charset |
||||
* the default string charset to use. |
||||
* @since 1.7 |
||||
*/ |
||||
public QuotedPrintableCodec(final Charset charset) { |
||||
this(charset, false); |
||||
} |
||||
|
||||
/** |
||||
* Constructor which allows for the selection of a default charset and strict mode. |
||||
* |
||||
* @param charset |
||||
* the default string charset to use. |
||||
* @param strict |
||||
* if {@code true}, soft line breaks will be used |
||||
* @since 1.10 |
||||
*/ |
||||
public QuotedPrintableCodec(final Charset charset, final boolean strict) { |
||||
this.charset = charset; |
||||
this.strict = strict; |
||||
} |
||||
|
||||
/** |
||||
* Constructor which allows for the selection of a default charset. |
||||
* |
||||
* @param charsetName |
||||
* the default string charset to use. |
||||
* @throws UnsupportedCharsetException |
||||
* If no support for the named charset is available |
||||
* in this instance of the Java virtual machine |
||||
* @throws IllegalArgumentException |
||||
* If the given charsetName is null |
||||
* @throws IllegalCharsetNameException |
||||
* If the given charset name is illegal |
||||
* |
||||
* @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable |
||||
*/ |
||||
public QuotedPrintableCodec(final String charsetName) |
||||
throws IllegalCharsetNameException, IllegalArgumentException, UnsupportedCharsetException { |
||||
this(Charset.forName(charsetName), false); |
||||
} |
||||
|
||||
/** |
||||
* Encodes byte into its quoted-printable representation. |
||||
* |
||||
* @param b |
||||
* byte to encode |
||||
* @param buffer |
||||
* the buffer to write to |
||||
* @return The number of bytes written to the <code>buffer</code> |
||||
*/ |
||||
private static final int encodeQuotedPrintable(final int b, final ByteArrayOutputStream buffer) { |
||||
buffer.write(ESCAPE_CHAR); |
||||
final char hex1 = Utils.hexDigit(b >> 4); |
||||
final char hex2 = Utils.hexDigit(b); |
||||
buffer.write(hex1); |
||||
buffer.write(hex2); |
||||
return 3; |
||||
} |
||||
|
||||
/** |
||||
* Return the byte at position <code>index</code> of the byte array and |
||||
* make sure it is unsigned. |
||||
* |
||||
* @param index |
||||
* position in the array |
||||
* @param bytes |
||||
* the byte array |
||||
* @return the unsigned octet at position <code>index</code> from the array |
||||
*/ |
||||
private static int getUnsignedOctet(final int index, final byte[] bytes) { |
||||
int b = bytes[index]; |
||||
if (b < 0) { |
||||
b = 256 + b; |
||||
} |
||||
return b; |
||||
} |
||||
|
||||
/** |
||||
* Write a byte to the buffer. |
||||
* |
||||
* @param b |
||||
* byte to write |
||||
* @param encode |
||||
* indicates whether the octet shall be encoded |
||||
* @param buffer |
||||
* the buffer to write to |
||||
* @return the number of bytes that have been written to the buffer |
||||
*/ |
||||
private static int encodeByte(final int b, final boolean encode, |
||||
final ByteArrayOutputStream buffer) { |
||||
if (encode) { |
||||
return encodeQuotedPrintable(b, buffer); |
||||
} |
||||
buffer.write(b); |
||||
return 1; |
||||
} |
||||
|
||||
/** |
||||
* Checks whether the given byte is whitespace. |
||||
* |
||||
* @param b |
||||
* byte to be checked |
||||
* @return <code>true</code> if the byte is either a space or tab character |
||||
*/ |
||||
private static boolean isWhitespace(final int b) { |
||||
return b == SPACE || b == TAB; |
||||
} |
||||
|
||||
/** |
||||
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. |
||||
* <p> |
||||
* This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in |
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text. |
||||
* |
||||
* @param printable |
||||
* bitset of characters deemed quoted-printable |
||||
* @param bytes |
||||
* array of bytes to be encoded |
||||
* @return array of bytes containing quoted-printable data |
||||
*/ |
||||
public static final byte[] encodeQuotedPrintable(final BitSet printable, final byte[] bytes) { |
||||
return encodeQuotedPrintable(printable, bytes, false); |
||||
} |
||||
|
||||
/** |
||||
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. |
||||
* <p> |
||||
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset |
||||
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in |
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text. |
||||
* |
||||
* @param printable |
||||
* bitset of characters deemed quoted-printable |
||||
* @param bytes |
||||
* array of bytes to be encoded |
||||
* @param strict |
||||
* if {@code true} the full ruleset is used, otherwise only rule #1 and rule #2 |
||||
* @return array of bytes containing quoted-printable data |
||||
* @since 1.10 |
||||
*/ |
||||
public static final byte[] encodeQuotedPrintable(BitSet printable, final byte[] bytes, final boolean strict) { |
||||
if (bytes == null) { |
||||
return null; |
||||
} |
||||
if (printable == null) { |
||||
printable = PRINTABLE_CHARS; |
||||
} |
||||
final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); |
||||
|
||||
if (strict) { |
||||
int pos = 1; |
||||
// encode up to buffer.length - 3, the last three octets will be treated
|
||||
// separately for simplification of note #3
|
||||
for (int i = 0; i < bytes.length - 3; i++) { |
||||
final int b = getUnsignedOctet(i, bytes); |
||||
if (pos < SAFE_LENGTH) { |
||||
// up to this length it is safe to add any byte, encoded or not
|
||||
pos += encodeByte(b, !printable.get(b), buffer); |
||||
} else { |
||||
// rule #3: whitespace at the end of a line *must* be encoded
|
||||
encodeByte(b, !printable.get(b) || isWhitespace(b), buffer); |
||||
|
||||
// rule #5: soft line break
|
||||
buffer.write(ESCAPE_CHAR); |
||||
buffer.write(CR); |
||||
buffer.write(LF); |
||||
pos = 1; |
||||
} |
||||
} |
||||
|
||||
// rule #3: whitespace at the end of a line *must* be encoded
|
||||
// if we would do a soft break line after this octet, encode whitespace
|
||||
int b = getUnsignedOctet(bytes.length - 3, bytes); |
||||
boolean encode = !printable.get(b) || (isWhitespace(b) && pos > SAFE_LENGTH - 5); |
||||
pos += encodeByte(b, encode, buffer); |
||||
|
||||
// note #3: '=' *must not* be the ultimate or penultimate character
|
||||
// simplification: if < 6 bytes left, do a soft line break as we may need
|
||||
// exactly 6 bytes space for the last 2 bytes
|
||||
if (pos > SAFE_LENGTH - 2) { |
||||
buffer.write(ESCAPE_CHAR); |
||||
buffer.write(CR); |
||||
buffer.write(LF); |
||||
} |
||||
for (int i = bytes.length - 2; i < bytes.length; i++) { |
||||
b = getUnsignedOctet(i, bytes); |
||||
// rule #3: trailing whitespace shall be encoded
|
||||
encode = !printable.get(b) || (i > bytes.length - 2 && isWhitespace(b)); |
||||
encodeByte(b, encode, buffer); |
||||
} |
||||
} else { |
||||
for (final byte c : bytes) { |
||||
int b = c; |
||||
if (b < 0) { |
||||
b = 256 + b; |
||||
} |
||||
if (printable.get(b)) { |
||||
buffer.write(b); |
||||
} else { |
||||
encodeQuotedPrintable(b, buffer); |
||||
} |
||||
} |
||||
} |
||||
return buffer.toByteArray(); |
||||
} |
||||
|
||||
/** |
||||
* Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted |
||||
* back to their original representation. |
||||
* <p> |
||||
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as |
||||
* defined in RFC 1521. |
||||
* |
||||
* @param bytes |
||||
* array of quoted-printable characters |
||||
* @return array of original bytes |
||||
* @throws DecoderException |
||||
* Thrown if quoted-printable decoding is unsuccessful |
||||
*/ |
||||
public static final byte[] decodeQuotedPrintable(final byte[] bytes) throws DecoderException { |
||||
if (bytes == null) { |
||||
return null; |
||||
} |
||||
final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); |
||||
for (int i = 0; i < bytes.length; i++) { |
||||
final int b = bytes[i]; |
||||
if (b == ESCAPE_CHAR) { |
||||
try { |
||||
// if the next octet is a CR we have found a soft line break
|
||||
if (bytes[++i] == CR) { |
||||
continue; |
||||
} |
||||
final int u = Utils.digit16(bytes[i]); |
||||
final int l = Utils.digit16(bytes[++i]); |
||||
buffer.write((char) ((u << 4) + l)); |
||||
} catch (final ArrayIndexOutOfBoundsException e) { |
||||
throw new DecoderException("Invalid quoted-printable encoding", e); |
||||
} |
||||
} else if (b != CR && b != LF) { |
||||
// every other octet is appended except for CR & LF
|
||||
buffer.write(b); |
||||
} |
||||
} |
||||
return buffer.toByteArray(); |
||||
} |
||||
|
||||
/** |
||||
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. |
||||
* <p> |
||||
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset |
||||
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in |
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text. |
||||
* |
||||
* @param bytes |
||||
* array of bytes to be encoded |
||||
* @return array of bytes containing quoted-printable data |
||||
*/ |
||||
@Override |
||||
public byte[] encode(final byte[] bytes) { |
||||
return encodeQuotedPrintable(PRINTABLE_CHARS, bytes, strict); |
||||
} |
||||
|
||||
/** |
||||
* Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted |
||||
* back to their original representation. |
||||
* <p> |
||||
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as |
||||
* defined in RFC 1521. |
||||
* |
||||
* @param bytes |
||||
* array of quoted-printable characters |
||||
* @return array of original bytes |
||||
* @throws DecoderException |
||||
* Thrown if quoted-printable decoding is unsuccessful |
||||
*/ |
||||
@Override |
||||
public byte[] decode(final byte[] bytes) throws DecoderException { |
||||
return decodeQuotedPrintable(bytes); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped. |
||||
* <p> |
||||
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset |
||||
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in |
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text. |
||||
* |
||||
* @param str |
||||
* string to convert to quoted-printable form |
||||
* @return quoted-printable string |
||||
* @throws EncoderException |
||||
* Thrown if quoted-printable encoding is unsuccessful |
||||
* |
||||
* @see #getCharset() |
||||
*/ |
||||
@Override |
||||
public String encode(final String str) throws EncoderException { |
||||
return this.encode(str, getCharset()); |
||||
} |
||||
|
||||
/** |
||||
* Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters |
||||
* are converted back to their original representation. |
||||
* |
||||
* @param str |
||||
* quoted-printable string to convert into its original form |
||||
* @param charset |
||||
* the original string charset |
||||
* @return original string |
||||
* @throws DecoderException |
||||
* Thrown if quoted-printable decoding is unsuccessful |
||||
* @since 1.7 |
||||
*/ |
||||
public String decode(final String str, final Charset charset) throws DecoderException { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset); |
||||
} |
||||
|
||||
/** |
||||
* Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters |
||||
* are converted back to their original representation. |
||||
* |
||||
* @param str |
||||
* quoted-printable string to convert into its original form |
||||
* @param charset |
||||
* the original string charset |
||||
* @return original string |
||||
* @throws DecoderException |
||||
* Thrown if quoted-printable decoding is unsuccessful |
||||
* @throws UnsupportedEncodingException |
||||
* Thrown if charset is not supported |
||||
*/ |
||||
public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
return new String(decode(StringUtils.getBytesUsAscii(str)), charset); |
||||
} |
||||
|
||||
/** |
||||
* Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are |
||||
* converted back to their original representation. |
||||
* |
||||
* @param str |
||||
* quoted-printable string to convert into its original form |
||||
* @return original string |
||||
* @throws DecoderException |
||||
* Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported. |
||||
* @see #getCharset() |
||||
*/ |
||||
@Override |
||||
public String decode(final String str) throws DecoderException { |
||||
return this.decode(str, this.getCharset()); |
||||
} |
||||
|
||||
/** |
||||
* Encodes an object into its quoted-printable safe form. Unsafe characters are escaped. |
||||
* |
||||
* @param obj |
||||
* string to convert to a quoted-printable form |
||||
* @return quoted-printable object |
||||
* @throws EncoderException |
||||
* Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is |
||||
* unsuccessful |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object obj) throws EncoderException { |
||||
if (obj == null) { |
||||
return null; |
||||
} else if (obj instanceof byte[]) { |
||||
return encode((byte[]) obj); |
||||
} else if (obj instanceof String) { |
||||
return encode((String) obj); |
||||
} else { |
||||
throw new EncoderException("Objects of type " + |
||||
obj.getClass().getName() + |
||||
" cannot be quoted-printable encoded"); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original |
||||
* representation. |
||||
* |
||||
* @param obj |
||||
* quoted-printable object to convert into its original form |
||||
* @return original object |
||||
* @throws DecoderException |
||||
* Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure |
||||
* condition is encountered during the decode process. |
||||
*/ |
||||
@Override |
||||
public Object decode(final Object obj) throws DecoderException { |
||||
if (obj == null) { |
||||
return null; |
||||
} else if (obj instanceof byte[]) { |
||||
return decode((byte[]) obj); |
||||
} else if (obj instanceof String) { |
||||
return decode((String) obj); |
||||
} else { |
||||
throw new DecoderException("Objects of type " + |
||||
obj.getClass().getName() + |
||||
" cannot be quoted-printable decoded"); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Gets the default charset name used for string decoding and encoding. |
||||
* |
||||
* @return the default charset name |
||||
* @since 1.7 |
||||
*/ |
||||
public Charset getCharset() { |
||||
return this.charset; |
||||
} |
||||
|
||||
/** |
||||
* Gets the default charset name used for string decoding and encoding. |
||||
* |
||||
* @return the default charset name |
||||
*/ |
||||
public String getDefaultCharset() { |
||||
return this.charset.name(); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. |
||||
* <p> |
||||
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset |
||||
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in |
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text. |
||||
* |
||||
* @param str |
||||
* string to convert to quoted-printable form |
||||
* @param charset |
||||
* the charset for str |
||||
* @return quoted-printable string |
||||
* @since 1.7 |
||||
*/ |
||||
public String encode(final String str, final Charset charset) { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset))); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. |
||||
* <p> |
||||
* Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset |
||||
* or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in |
||||
* RFC 1521 and is suitable for encoding binary data and unformatted text. |
||||
* |
||||
* @param str |
||||
* string to convert to quoted-printable form |
||||
* @param charset |
||||
* the charset for str |
||||
* @return quoted-printable string |
||||
* @throws UnsupportedEncodingException |
||||
* Thrown if the charset is not supported |
||||
*/ |
||||
public String encode(final String str, final String charset) throws UnsupportedEncodingException { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
return StringUtils.newStringUsAscii(encode(str.getBytes(charset))); |
||||
} |
||||
} |
@ -0,0 +1,186 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.net; |
||||
|
||||
import java.io.UnsupportedEncodingException; |
||||
import java.nio.charset.Charset; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.binary.StringUtils; |
||||
import com.fr.third.org.apache.commons.codec.DecoderException; |
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
|
||||
/** |
||||
* Implements methods common to all codecs defined in RFC 1522. |
||||
* <p> |
||||
* <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the |
||||
* encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which |
||||
* is unlikely to confuse existing message handling software. |
||||
* <p> |
||||
* This class is immutable and thread-safe. |
||||
* |
||||
* @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: |
||||
* Message Header Extensions for Non-ASCII Text</a> |
||||
* |
||||
* @since 1.3 |
||||
* @version $Id: RFC1522Codec.java 1619948 2014-08-22 22:53:55Z ggregory $ |
||||
*/ |
||||
abstract class RFC1522Codec { |
||||
|
||||
/** Separator. */ |
||||
protected static final char SEP = '?'; |
||||
|
||||
/** Prefix. */ |
||||
protected static final String POSTFIX = "?="; |
||||
|
||||
/** Postfix. */ |
||||
protected static final String PREFIX = "=?"; |
||||
|
||||
/** |
||||
* Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset. |
||||
* <p> |
||||
* This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes |
||||
* {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding. |
||||
* |
||||
* @param text |
||||
* a string to encode |
||||
* @param charset |
||||
* a charset to be used |
||||
* @return RFC 1522 compliant "encoded-word" |
||||
* @throws EncoderException |
||||
* thrown if there is an error condition during the Encoding process. |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
protected String encodeText(final String text, final Charset charset) throws EncoderException { |
||||
if (text == null) { |
||||
return null; |
||||
} |
||||
final StringBuilder buffer = new StringBuilder(); |
||||
buffer.append(PREFIX); |
||||
buffer.append(charset); |
||||
buffer.append(SEP); |
||||
buffer.append(this.getEncoding()); |
||||
buffer.append(SEP); |
||||
final byte [] rawData = this.doEncoding(text.getBytes(charset)); |
||||
buffer.append(StringUtils.newStringUsAscii(rawData)); |
||||
buffer.append(POSTFIX); |
||||
return buffer.toString(); |
||||
} |
||||
|
||||
/** |
||||
* Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset. |
||||
* <p> |
||||
* This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes |
||||
* {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding. |
||||
* |
||||
* @param text |
||||
* a string to encode |
||||
* @param charsetName |
||||
* the charset to use |
||||
* @return RFC 1522 compliant "encoded-word" |
||||
* @throws EncoderException |
||||
* thrown if there is an error condition during the Encoding process. |
||||
* @throws UnsupportedEncodingException |
||||
* if charset is not available |
||||
* |
||||
* @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> |
||||
*/ |
||||
protected String encodeText(final String text, final String charsetName) |
||||
throws EncoderException, UnsupportedEncodingException { |
||||
if (text == null) { |
||||
return null; |
||||
} |
||||
return this.encodeText(text, Charset.forName(charsetName)); |
||||
} |
||||
|
||||
/** |
||||
* Applies an RFC 1522 compliant decoding scheme to the given string of text. |
||||
* <p> |
||||
* This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes |
||||
* {@link #doEncoding(byte [])} method of a concrete class to perform the specific decoding. |
||||
* |
||||
* @param text |
||||
* a string to decode |
||||
* @return A new decoded String or <code>null</code> if the input is <code>null</code>. |
||||
* @throws DecoderException |
||||
* thrown if there is an error condition during the decoding process. |
||||
* @throws UnsupportedEncodingException |
||||
* thrown if charset specified in the "encoded-word" header is not supported |
||||
*/ |
||||
protected String decodeText(final String text) |
||||
throws DecoderException, UnsupportedEncodingException { |
||||
if (text == null) { |
||||
return null; |
||||
} |
||||
if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) { |
||||
throw new DecoderException("RFC 1522 violation: malformed encoded content"); |
||||
} |
||||
final int terminator = text.length() - 2; |
||||
int from = 2; |
||||
int to = text.indexOf(SEP, from); |
||||
if (to == terminator) { |
||||
throw new DecoderException("RFC 1522 violation: charset token not found"); |
||||
} |
||||
final String charset = text.substring(from, to); |
||||
if (charset.equals("")) { |
||||
throw new DecoderException("RFC 1522 violation: charset not specified"); |
||||
} |
||||
from = to + 1; |
||||
to = text.indexOf(SEP, from); |
||||
if (to == terminator) { |
||||
throw new DecoderException("RFC 1522 violation: encoding token not found"); |
||||
} |
||||
final String encoding = text.substring(from, to); |
||||
if (!getEncoding().equalsIgnoreCase(encoding)) { |
||||
throw new DecoderException("This codec cannot decode " + encoding + " encoded content"); |
||||
} |
||||
from = to + 1; |
||||
to = text.indexOf(SEP, from); |
||||
byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to)); |
||||
data = doDecoding(data); |
||||
return new String(data, charset); |
||||
} |
||||
|
||||
/** |
||||
* Returns the codec name (referred to as encoding in the RFC 1522). |
||||
* |
||||
* @return name of the codec |
||||
*/ |
||||
protected abstract String getEncoding(); |
||||
|
||||
/** |
||||
* Encodes an array of bytes using the defined encoding scheme. |
||||
* |
||||
* @param bytes |
||||
* Data to be encoded |
||||
* @return A byte array containing the encoded data |
||||
* @throws EncoderException |
||||
* thrown if the Encoder encounters a failure condition during the encoding process. |
||||
*/ |
||||
protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException; |
||||
|
||||
/** |
||||
* Decodes an array of bytes using the defined encoding scheme. |
||||
* |
||||
* @param bytes |
||||
* Data to be decoded |
||||
* @return a byte array that contains decoded data |
||||
* @throws DecoderException |
||||
* A decoder exception is thrown if a Decoder encounters a failure condition during the decode process. |
||||
*/ |
||||
protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException; |
||||
} |
@ -0,0 +1,373 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.net; |
||||
|
||||
import java.io.ByteArrayOutputStream; |
||||
import java.io.UnsupportedEncodingException; |
||||
import java.util.BitSet; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.binary.StringUtils; |
||||
import com.fr.third.org.apache.commons.codec.BinaryDecoder; |
||||
import com.fr.third.org.apache.commons.codec.BinaryEncoder; |
||||
import com.fr.third.org.apache.commons.codec.CharEncoding; |
||||
import com.fr.third.org.apache.commons.codec.DecoderException; |
||||
import com.fr.third.org.apache.commons.codec.EncoderException; |
||||
import com.fr.third.org.apache.commons.codec.StringDecoder; |
||||
import com.fr.third.org.apache.commons.codec.StringEncoder; |
||||
|
||||
/** |
||||
* Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding. |
||||
* <p> |
||||
* This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and |
||||
* {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below |
||||
* 1.4 rely on the platform's default charset encoding. |
||||
* <p> |
||||
* This class is thread-safe since 1.11 |
||||
* |
||||
* @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a> |
||||
* of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification</a> |
||||
* |
||||
* @since 1.2 |
||||
* @version $Id: URLCodec.java 1789142 2017-03-28 13:58:58Z sebb $ |
||||
*/ |
||||
public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { |
||||
|
||||
/** |
||||
* The default charset used for string decoding and encoding. |
||||
* |
||||
* @deprecated TODO: This field will be changed to a private final Charset in 2.0. (CODEC-126) |
||||
*/ |
||||
@Deprecated |
||||
protected volatile String charset; // added volatile: see CODEC-232
|
||||
|
||||
/** |
||||
* Release 1.5 made this field final. |
||||
*/ |
||||
protected static final byte ESCAPE_CHAR = '%'; |
||||
|
||||
/** |
||||
* BitSet of www-form-url safe characters. |
||||
* This is a copy of the internal BitSet which is now used for the conversion. |
||||
* Changes to this field are ignored. |
||||
* @deprecated 1.11 Will be removed in 2.0 (CODEC-230) |
||||
*/ |
||||
@Deprecated |
||||
protected static final BitSet WWW_FORM_URL; |
||||
|
||||
private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256); |
||||
|
||||
// Static initializer for www_form_url
|
||||
static { |
||||
// alpha characters
|
||||
for (int i = 'a'; i <= 'z'; i++) { |
||||
WWW_FORM_URL_SAFE.set(i); |
||||
} |
||||
for (int i = 'A'; i <= 'Z'; i++) { |
||||
WWW_FORM_URL_SAFE.set(i); |
||||
} |
||||
// numeric characters
|
||||
for (int i = '0'; i <= '9'; i++) { |
||||
WWW_FORM_URL_SAFE.set(i); |
||||
} |
||||
// special chars
|
||||
WWW_FORM_URL_SAFE.set('-'); |
||||
WWW_FORM_URL_SAFE.set('_'); |
||||
WWW_FORM_URL_SAFE.set('.'); |
||||
WWW_FORM_URL_SAFE.set('*'); |
||||
// blank to be replaced with +
|
||||
WWW_FORM_URL_SAFE.set(' '); |
||||
|
||||
// Create a copy in case anyone (ab)uses it
|
||||
WWW_FORM_URL = (BitSet) WWW_FORM_URL_SAFE.clone(); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Default constructor. |
||||
*/ |
||||
public URLCodec() { |
||||
this(CharEncoding.UTF_8); |
||||
} |
||||
|
||||
/** |
||||
* Constructor which allows for the selection of a default charset. |
||||
* |
||||
* @param charset the default string charset to use. |
||||
*/ |
||||
public URLCodec(final String charset) { |
||||
super(); |
||||
this.charset = charset; |
||||
} |
||||
|
||||
/** |
||||
* Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. |
||||
* |
||||
* @param urlsafe |
||||
* bitset of characters deemed URL safe |
||||
* @param bytes |
||||
* array of bytes to convert to URL safe characters |
||||
* @return array of bytes containing URL safe characters |
||||
*/ |
||||
public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) { |
||||
if (bytes == null) { |
||||
return null; |
||||
} |
||||
if (urlsafe == null) { |
||||
urlsafe = WWW_FORM_URL_SAFE; |
||||
} |
||||
|
||||
final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); |
||||
for (final byte c : bytes) { |
||||
int b = c; |
||||
if (b < 0) { |
||||
b = 256 + b; |
||||
} |
||||
if (urlsafe.get(b)) { |
||||
if (b == ' ') { |
||||
b = '+'; |
||||
} |
||||
buffer.write(b); |
||||
} else { |
||||
buffer.write(ESCAPE_CHAR); |
||||
final char hex1 = Utils.hexDigit(b >> 4); |
||||
final char hex2 = Utils.hexDigit(b); |
||||
buffer.write(hex1); |
||||
buffer.write(hex2); |
||||
} |
||||
} |
||||
return buffer.toByteArray(); |
||||
} |
||||
|
||||
/** |
||||
* Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted |
||||
* back to their original representation. |
||||
* |
||||
* @param bytes |
||||
* array of URL safe characters |
||||
* @return array of original bytes |
||||
* @throws DecoderException |
||||
* Thrown if URL decoding is unsuccessful |
||||
*/ |
||||
public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException { |
||||
if (bytes == null) { |
||||
return null; |
||||
} |
||||
final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); |
||||
for (int i = 0; i < bytes.length; i++) { |
||||
final int b = bytes[i]; |
||||
if (b == '+') { |
||||
buffer.write(' '); |
||||
} else if (b == ESCAPE_CHAR) { |
||||
try { |
||||
final int u = Utils.digit16(bytes[++i]); |
||||
final int l = Utils.digit16(bytes[++i]); |
||||
buffer.write((char) ((u << 4) + l)); |
||||
} catch (final ArrayIndexOutOfBoundsException e) { |
||||
throw new DecoderException("Invalid URL encoding: ", e); |
||||
} |
||||
} else { |
||||
buffer.write(b); |
||||
} |
||||
} |
||||
return buffer.toByteArray(); |
||||
} |
||||
|
||||
/** |
||||
* Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. |
||||
* |
||||
* @param bytes |
||||
* array of bytes to convert to URL safe characters |
||||
* @return array of bytes containing URL safe characters |
||||
*/ |
||||
@Override |
||||
public byte[] encode(final byte[] bytes) { |
||||
return encodeUrl(WWW_FORM_URL_SAFE, bytes); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted |
||||
* back to their original representation. |
||||
* |
||||
* @param bytes |
||||
* array of URL safe characters |
||||
* @return array of original bytes |
||||
* @throws DecoderException |
||||
* Thrown if URL decoding is unsuccessful |
||||
*/ |
||||
@Override |
||||
public byte[] decode(final byte[] bytes) throws DecoderException { |
||||
return decodeUrl(bytes); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped. |
||||
* |
||||
* @param str |
||||
* string to convert to a URL safe form |
||||
* @param charset |
||||
* the charset for str |
||||
* @return URL safe string |
||||
* @throws UnsupportedEncodingException |
||||
* Thrown if charset is not supported |
||||
*/ |
||||
public String encode(final String str, final String charset) throws UnsupportedEncodingException { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
return StringUtils.newStringUsAscii(encode(str.getBytes(charset))); |
||||
} |
||||
|
||||
/** |
||||
* Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped. |
||||
* |
||||
* @param str |
||||
* string to convert to a URL safe form |
||||
* @return URL safe string |
||||
* @throws EncoderException |
||||
* Thrown if URL encoding is unsuccessful |
||||
* |
||||
* @see #getDefaultCharset() |
||||
*/ |
||||
@Override |
||||
public String encode(final String str) throws EncoderException { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
try { |
||||
return encode(str, getDefaultCharset()); |
||||
} catch (final UnsupportedEncodingException e) { |
||||
throw new EncoderException(e.getMessage(), e); |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted |
||||
* back to their original representation. |
||||
* |
||||
* @param str |
||||
* URL safe string to convert into its original form |
||||
* @param charset |
||||
* the original string charset |
||||
* @return original string |
||||
* @throws DecoderException |
||||
* Thrown if URL decoding is unsuccessful |
||||
* @throws UnsupportedEncodingException |
||||
* Thrown if charset is not supported |
||||
*/ |
||||
public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
return new String(decode(StringUtils.getBytesUsAscii(str)), charset); |
||||
} |
||||
|
||||
/** |
||||
* Decodes a URL safe string into its original form using the default string charset. Escaped characters are |
||||
* converted back to their original representation. |
||||
* |
||||
* @param str |
||||
* URL safe string to convert into its original form |
||||
* @return original string |
||||
* @throws DecoderException |
||||
* Thrown if URL decoding is unsuccessful |
||||
* @see #getDefaultCharset() |
||||
*/ |
||||
@Override |
||||
public String decode(final String str) throws DecoderException { |
||||
if (str == null) { |
||||
return null; |
||||
} |
||||
try { |
||||
return decode(str, getDefaultCharset()); |
||||
} catch (final UnsupportedEncodingException e) { |
||||
throw new DecoderException(e.getMessage(), e); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Encodes an object into its URL safe form. Unsafe characters are escaped. |
||||
* |
||||
* @param obj |
||||
* string to convert to a URL safe form |
||||
* @return URL safe object |
||||
* @throws EncoderException |
||||
* Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful |
||||
*/ |
||||
@Override |
||||
public Object encode(final Object obj) throws EncoderException { |
||||
if (obj == null) { |
||||
return null; |
||||
} else if (obj instanceof byte[]) { |
||||
return encode((byte[])obj); |
||||
} else if (obj instanceof String) { |
||||
return encode((String)obj); |
||||
} else { |
||||
throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded"); |
||||
|
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Decodes a URL safe object into its original form. Escaped characters are converted back to their original |
||||
* representation. |
||||
* |
||||
* @param obj |
||||
* URL safe object to convert into its original form |
||||
* @return original object |
||||
* @throws DecoderException |
||||
* Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure |
||||
* condition is encountered during the decode process. |
||||
*/ |
||||
@Override |
||||
public Object decode(final Object obj) throws DecoderException { |
||||
if (obj == null) { |
||||
return null; |
||||
} else if (obj instanceof byte[]) { |
||||
return decode((byte[]) obj); |
||||
} else if (obj instanceof String) { |
||||
return decode((String) obj); |
||||
} else { |
||||
throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded"); |
||||
|
||||
} |
||||
} |
||||
|
||||
/** |
||||
* The default charset used for string decoding and encoding. |
||||
* |
||||
* @return the default string charset. |
||||
*/ |
||||
public String getDefaultCharset() { |
||||
return this.charset; |
||||
} |
||||
|
||||
/** |
||||
* The <code>String</code> encoding used for decoding and encoding. |
||||
* |
||||
* @return Returns the encoding. |
||||
* |
||||
* @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0. |
||||
*/ |
||||
@Deprecated |
||||
public String getEncoding() { |
||||
return this.charset; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,65 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package com.fr.third.org.apache.commons.codec.net; |
||||
|
||||
import com.fr.third.org.apache.commons.codec.DecoderException; |
||||
|
||||
/** |
||||
* Utility methods for this package. |
||||
* |
||||
* <p>This class is immutable and thread-safe.</p> |
||||
* |
||||
* @version $Id: Utils.java 1811344 2017-10-06 15:19:57Z ggregory $ |
||||
* @since 1.4 |
||||
*/ |
||||
class Utils { |
||||
|
||||
/** |
||||
* Radix used in encoding and decoding. |
||||
*/ |
||||
private static final int RADIX = 16; |
||||
|
||||
/** |
||||
* Returns the numeric value of the character <code>b</code> in radix 16. |
||||
* |
||||
* @param b |
||||
* The byte to be converted. |
||||
* @return The numeric value represented by the character in radix 16. |
||||
* |
||||
* @throws DecoderException |
||||
* Thrown when the byte is not valid per {@link Character#digit(char,int)} |
||||
*/ |
||||
static int digit16(final byte b) throws DecoderException { |
||||
final int i = Character.digit((char) b, RADIX); |
||||
if (i == -1) { |
||||
throw new DecoderException("Invalid URL encoding: not a valid digit (radix " + RADIX + "): " + b); |
||||
} |
||||
return i; |
||||
} |
||||
|
||||
/** |
||||
* Returns the upper case hex digit of the lower 4 bits of the int. |
||||
* |
||||
* @param b the input int |
||||
* @return the upper case hex digit of the lower 4 bits of the int. |
||||
*/ |
||||
static char hexDigit(final int b) { |
||||
return Character.toUpperCase(Character.forDigit(b & 0xF, RADIX)); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,23 @@
|
||||
<!-- |
||||
Licensed to the Apache Software Foundation (ASF) under one or more |
||||
contributor license agreements. See the NOTICE file distributed with |
||||
this work for additional information regarding copyright ownership. |
||||
The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
(the "License"); you may not use this file except in compliance with |
||||
the License. You may obtain a copy of the License at |
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
Unless required by applicable law or agreed to in writing, software |
||||
distributed under the License is distributed on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
See the License for the specific language governing permissions and |
||||
limitations under the License. |
||||
--> |
||||
<html> |
||||
<body> |
||||
<p> |
||||
Network related encoding and decoding. |
||||
</p> |
||||
</body> |
||||
</html> |
@ -0,0 +1,100 @@
|
||||
<!-- |
||||
Licensed to the Apache Software Foundation (ASF) under one or more |
||||
contributor license agreements. See the NOTICE file distributed with |
||||
this work for additional information regarding copyright ownership. |
||||
The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
(the "License"); you may not use this file except in compliance with |
||||
the License. You may obtain a copy of the License at |
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
Unless required by applicable law or agreed to in writing, software |
||||
distributed under the License is distributed on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
See the License for the specific language governing permissions and |
||||
limitations under the License. |
||||
--> |
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> |
||||
<html> |
||||
<head> |
||||
</head> |
||||
<body> |
||||
<p>Interfaces and classes used by |
||||
the various implementations in the sub-packages.</p> |
||||
|
||||
<p>Definitive implementations of commonly used encoders and decoders.</p> |
||||
|
||||
<p>Codec is currently comprised of a modest set of utilities and a |
||||
simple framework for String encoding and decoding in three categories: |
||||
Binary Encoders, Language Encoders, and Network Encoders. </p> |
||||
|
||||
<h4><a name="Common Encoders">Binary Encoders</a></h4> |
||||
|
||||
<table border="1" width="100%" cellspacing="2" cellpadding="3"> |
||||
<tbody> |
||||
<tr> |
||||
<td> |
||||
<a href="binary/Base64.html"> |
||||
com.fr.third.org.apache.commons.codec.binary.Base64</a> |
||||
</td> |
||||
<td> |
||||
Provides Base64 content-transfer-encoding as defined in |
||||
<a href="http://www.ietf.org/rfc/rfc2045.txt"> RFC 2045</a> |
||||
</td> |
||||
<td>Production</td> |
||||
</tr> |
||||
<tr> |
||||
<td> |
||||
<a href="binary/Hex.html"> |
||||
com.fr.third.org.apache.commons.codec.binary.Hex</a> |
||||
</td> |
||||
<td> |
||||
Converts an array of bytes into an array of characters |
||||
representing the hexadecimal values of each byte in order |
||||
</td> |
||||
<td>Production</td> |
||||
</tr> |
||||
</tbody> |
||||
</table> |
||||
<h4> |
||||
<a name="Language Encoders">Language Encoders</a> |
||||
</h4> |
||||
<p> |
||||
Codec contains a number of commonly used language and phonetic |
||||
encoders |
||||
</p> |
||||
<table border="1" width="100%" cellspacing="2" cellpadding="3"> |
||||
<tbody> |
||||
<tr> |
||||
<td> |
||||
<a href="#">com.fr.third.org.apache.commons.codec.language.Soundex</a> |
||||
</td> |
||||
<td>Implementation of the Soundex algorithm.</td> |
||||
<td>Production</td> |
||||
</tr> |
||||
<tr> |
||||
<td> |
||||
<a href="#">com.fr.third.org.apache.commons.codec.language.Metaphone</a> |
||||
</td> |
||||
<td>Implementation of the Metaphone algorithm.</td> |
||||
<td>Production</td> |
||||
</tr> |
||||
</tbody> |
||||
</table> |
||||
<h4><a name="Network_Encoders">Network Encoders</a></h4> |
||||
<h4> </h4> |
||||
<p> Codec contains network related encoders </p> |
||||
<table border="1" width="100%" cellspacing="2" cellpadding="3"> |
||||
<tbody> |
||||
<tr> |
||||
<td> |
||||
<a href="#">com.fr.third.org.apache.commons.codec.net.URLCodec</a> |
||||
</td> |
||||
<td>Implements the 'www-form-urlencoded' encoding scheme.</td> |
||||
<td>Production</td> |
||||
</tr> |
||||
</tbody> |
||||
</table> |
||||
<br> |
||||
</body> |
||||
</html> |
Loading…
Reference in new issue