Updated EdDSA code

Source: https://github.com/str4d/ed25519-java Git commit: 58e4efadf972f4dc4f67c05152f82b49fb22bac6
2014-11-12 10:20:28 +00:00
parent 1a9fb381ed
commit 7a7ae77c83
11 changed files with 975 additions and 437 deletions
--- a/core/java/src/net/i2p/crypto/eddsa/Utils.java
+++ b/core/java/src/net/i2p/crypto/eddsa/Utils.java
@@ -31,6 +31,7 @@ public class Utils {
        for (int i = 0; i < 32; i++) {
            result |= b[i] ^ c[i];
        }
+
        return equal(result, 0);
    }

--- a/core/java/src/net/i2p/crypto/eddsa/math/Curve.java
+++ b/core/java/src/net/i2p/crypto/eddsa/math/Curve.java
@@ -27,8 +27,8 @@ public class Curve implements Serializable {
        this.d2 = this.d.add(this.d);
        this.I = I;

-        FieldElement zero = f.zero;
-        FieldElement one = f.one;
+        FieldElement zero = f.ZERO;
+        FieldElement one = f.ONE;
        zeroP2 = GroupElement.p2(this, zero, one, one);
        zeroP3 = GroupElement.p3(this, zero, one, one, zero);
        zeroPrecomp = GroupElement.precomp(this, one, one, zero);
--- a/core/java/src/net/i2p/crypto/eddsa/math/Encoding.java
+++ b/core/java/src/net/i2p/crypto/eddsa/math/Encoding.java
@@ -32,7 +32,7 @@ public abstract class Encoding {
    public abstract FieldElement decode(byte[] in);

    /**
-     * From the Ed25519 paper:
+     * From the Ed25519 paper:<br>
     * x is negative if the (b-1)-bit encoding of x is lexicographically larger
     * than the (b-1)-bit encoding of -x. If q is an odd prime and the encoding
     * is the little-endian representation of {0, 1,..., q-1} then the negative
--- a/core/java/src/net/i2p/crypto/eddsa/math/Field.java
+++ b/core/java/src/net/i2p/crypto/eddsa/math/Field.java
@@ -12,12 +12,12 @@ import java.io.Serializable;
 public class Field implements Serializable {
    private static final long serialVersionUID = 8746587465875676L;

-    public final FieldElement zero;
-    public final FieldElement one;
-    public final FieldElement two;
-    public final FieldElement four;
-    public final FieldElement five;
-    public final FieldElement eight;
+    public final FieldElement ZERO;
+    public final FieldElement ONE;
+    public final FieldElement TWO;
+    public final FieldElement FOUR;
+    public final FieldElement FIVE;
+    public final FieldElement EIGHT;

    private final int b;
    private final FieldElement q;
@@ -39,16 +39,16 @@ public class Field implements Serializable {
        this.q = fromByteArray(q);

        // Set up constants
-        zero = fromByteArray(Constants.ZERO);
-        one = fromByteArray(Constants.ONE);
-        two = fromByteArray(Constants.TWO);
-        four = fromByteArray(Constants.FOUR);
-        five = fromByteArray(Constants.FIVE);
-        eight = fromByteArray(Constants.EIGHT);
+        ZERO = fromByteArray(Constants.ZERO);
+        ONE = fromByteArray(Constants.ONE);
+        TWO = fromByteArray(Constants.TWO);
+        FOUR = fromByteArray(Constants.FOUR);
+        FIVE = fromByteArray(Constants.FIVE);
+        EIGHT = fromByteArray(Constants.EIGHT);

        // Precompute values
-        qm2 = this.q.subtract(two);
-        qm5d8 = this.q.subtract(five).divide(eight);
+        qm2 = this.q.subtract(TWO);
+        qm5d8 = this.q.subtract(FIVE).divide(EIGHT);
    }

    public FieldElement fromByteArray(byte[] x) {
--- a/core/java/src/net/i2p/crypto/eddsa/math/FieldElement.java
+++ b/core/java/src/net/i2p/crypto/eddsa/math/FieldElement.java
@@ -9,6 +9,9 @@ public abstract class FieldElement {
    protected final Field f;

    public FieldElement(Field f) {
+        if (null == f) {
+            throw new IllegalArgumentException("field cannot be null");
+        }
        this.f = f;
    }

@@ -29,13 +32,13 @@ public abstract class FieldElement {
    public abstract FieldElement add(FieldElement val);

    public FieldElement addOne() {
-        return add(f.one);
+        return add(f.ONE);
    }

    public abstract FieldElement subtract(FieldElement val);

    public FieldElement subtractOne() {
-        return subtract(f.one);
+        return subtract(f.ONE);
    }

    public abstract FieldElement negate();
--- a/core/java/src/net/i2p/crypto/eddsa/math/GroupElement.java
+++ b/core/java/src/net/i2p/crypto/eddsa/math/GroupElement.java
--- a/core/java/src/net/i2p/crypto/eddsa/math/ScalarOps.java
+++ b/core/java/src/net/i2p/crypto/eddsa/math/ScalarOps.java
@@ -8,7 +8,8 @@ package net.i2p.crypto.eddsa.math;
 public interface ScalarOps {
    /**
     * Reduce the given scalar mod l.
-     * From the Ed25519 paper:
+     * <p>
+     * From the Ed25519 paper:<br>
     * Here we interpret 2b-bit strings in little-endian form as integers in
     * {0, 1,..., 2^(2b)-1}.
     * @param s
--- a/core/java/src/net/i2p/crypto/eddsa/math/bigint/BigIntegerLittleEndianEncoding.java
+++ b/core/java/src/net/i2p/crypto/eddsa/math/bigint/BigIntegerLittleEndianEncoding.java
@@ -60,7 +60,7 @@ public class BigIntegerLittleEndianEncoding extends Encoding implements Serializ
    }

    /**
-     * From the Ed25519 paper:
+     * From the Ed25519 paper:<br>
     * x is negative if the (b-1)-bit encoding of x is lexicographically larger
     * than the (b-1)-bit encoding of -x. If q is an odd prime and the encoding
     * is the little-endian representation of {0, 1,..., q-1} then the negative
--- a/core/java/src/net/i2p/crypto/eddsa/math/ed25519/Ed25519FieldElement.java
+++ b/core/java/src/net/i2p/crypto/eddsa/math/ed25519/Ed25519FieldElement.java
@@ -1,13 +1,18 @@
 package net.i2p.crypto.eddsa.math.ed25519;

 import net.i2p.crypto.eddsa.Utils;
-import net.i2p.crypto.eddsa.math.Field;
-import net.i2p.crypto.eddsa.math.FieldElement;
+import net.i2p.crypto.eddsa.math.*;
+
+import java.util.Arrays;

 /**
+ * Class to represent a field element of the finite field p=2^255-19 elements.
+ * <p>
 * An element t, entries t[0]...t[9], represents the integer
 * t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9].
 * Bounds on each t[i] vary depending on context.
+ * <p>
+ * Reviewed/commented by Bloody Rookie (nemproject@gmx.de)
 */
 public class Ed25519FieldElement extends FieldElement {
    /**
@@ -15,6 +20,12 @@ public class Ed25519FieldElement extends FieldElement {
     */
    final int[] t;

+    /**
+     * Creates a field element.
+     *
+     * @param f The underlying field, must be the finite field with p = 2^255 - 19 elements
+     * @param t The 2^25.5 bit representation of the field element.
+     */
    public Ed25519FieldElement(Field f, int[] t) {
        super(f);
        if (t.length != 10)
@@ -24,21 +35,32 @@ public class Ed25519FieldElement extends FieldElement {

    private static final byte[] ZERO = new byte[32];

+    /**
+     * Gets a value indicating whether or not the field element is non-zero.
+     *
+     * @return 1 if it is non-zero, 0 otherwise.
+     */
    public boolean isNonZero() {
-        byte[] s = toByteArray();
+        final byte[] s = toByteArray();
        return Utils.equal(s, ZERO) == 0;
    }

    /**
     * h = f + g
-     * Can overlap h with f or g.
-     *
+     * <p>
+     * TODO-CR BR: h is allocated via new, probably not a good idea. Do we need the copying into temp variables if we do that?
+     * <p>
     * Preconditions:
-     *    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-     *    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-     *
+     * <p><ul>
+     * <li>|f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+     * <li>|g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+     * </ul><p>
     * Postconditions:
-     *    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+     * <p><ul>
+     * <li>|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+     *
+     * @param val The field element to add.
+     * @return The field element this + val.
     */
    public FieldElement add(FieldElement val) {
        int[] g = ((Ed25519FieldElement)val).t;
@@ -51,14 +73,22 @@ public class Ed25519FieldElement extends FieldElement {

    /**
     * h = f - g
+     * <p>
     * Can overlap h with f or g.
-     *
+     * <p>
+     * TODO-CR BR: See above.
+     * <p>
     * Preconditions:
-     *    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-     *    |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-     *
+     * <p><ul>
+     * <li>|f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+     * <li>|g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+     * </ul><p>
     * Postconditions:
-     *    |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+     * <p><ul>
+     * <li>|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+     *
+     * @param val The field element to subtract.
+     * @return The field element this - val.
     **/
    public FieldElement subtract(FieldElement val) {
        int[] g = ((Ed25519FieldElement)val).t;
@@ -71,12 +101,18 @@ public class Ed25519FieldElement extends FieldElement {

    /**
     * h = -f
-     *
+     * <p>
+     * TODO-CR BR: see above.
+     * <p>
     * Preconditions:
-     *    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-     *
+     * <p><ul>
+     * <li>|f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+     * </ul><p>
     * Postconditions:
-     *    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+     * <p><ul>
+     * <li>|h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+     *
+     * @return The field element (-1) * this.
     */
    public FieldElement negate() {
        int[] h = new int[10];
@@ -87,32 +123,42 @@ public class Ed25519FieldElement extends FieldElement {
    }

    /**
-     * h = f * g Can overlap h with f or g.
-     * 
-     * Preconditions: |f| bounded by
-     * 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. |g| bounded by
+     * h = f * g
+     * <p>
+     * Can overlap h with f or g.
+     * <p>
+     * Preconditions:
+     * <p><ul>
+     * <li>|f| bounded by
     * 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
-     * 
-     * Postconditions: |h| bounded by
+     * <li>|g| bounded by
+     * 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+     * </ul><p>
+     * Postconditions:
+     * <p><ul>
+     * <li>|h| bounded by
     * 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
-     *
+     * </ul><p>
     * Notes on implementation strategy:
-     *
+     * <p>
     * Using schoolbook multiplication. Karatsuba would save a little in some
     * cost models.
-     *
+     * <p>
     * Most multiplications by 2 and 19 are 32-bit precomputations; cheaper than
     * 64-bit postcomputations.
-     *
+     * <p>
     * There is one remaining multiplication by 19 in the carry chain; one *19
     * precomputation can be merged into this, but the resulting data flow is
     * considerably less clean.
-     *
+     * <p>
     * There are 12 carries below. 10 of them are 2-way parallelizable and
     * vectorizable. Can get away with 11 carries, but then data flow is much
     * deeper.
-     *
+     * <p>
     * With tighter constraints on inputs can squeeze carries into int32.
+     *
+     * @param val The field element to multiply.
+     * @return The (reasonably reduced) field element this * val.
     */
    public FieldElement multiply(FieldElement val) {
        int[] g = ((Ed25519FieldElement)val).t;
@@ -230,16 +276,28 @@ public class Ed25519FieldElement extends FieldElement {
        long f9g7_38 = f9_2 * (long) g7_19;
        long f9g8_19 = t[9] * (long) g8_19;
        long f9g9_38 = f9_2 * (long) g9_19;
-        long h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
-        long h1 = f0g1+f1g0   +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
-        long h2 = f0g2+f1g1_2 +f2g0   +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
-        long h3 = f0g3+f1g2   +f2g1   +f3g0   +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
-        long h4 = f0g4+f1g3_2 +f2g2   +f3g1_2 +f4g0   +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
-        long h5 = f0g5+f1g4   +f2g3   +f3g2   +f4g1   +f5g0   +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
-        long h6 = f0g6+f1g5_2 +f2g4   +f3g3_2 +f4g2   +f5g1_2 +f6g0   +f7g9_38+f8g8_19+f9g7_38;
-        long h7 = f0g7+f1g6   +f2g5   +f3g4   +f4g3   +f5g2   +f6g1   +f7g0   +f8g9_19+f9g8_19;
-        long h8 = f0g8+f1g7_2 +f2g6   +f3g5_2 +f4g4   +f5g3_2 +f6g2   +f7g1_2 +f8g0   +f9g9_38;
-        long h9 = f0g9+f1g8   +f2g7   +f3g6   +f4g5   +f5g4   +f6g3   +f7g2   +f8g1   +f9g0   ;
+
+        /**
+         * Remember: 2^255 congruent 19 modulo p.
+         * h = h0 * 2^0 + h1 * 2^26 + h2 * 2^(26+25) + h3 * 2^(26+25+26) + ... + h9 * 2^(5*26+5*25).
+         * So to get the real number we would have to multiply the coefficients with the corresponding powers of 2.
+         * To get an idea what is going on below, look at the calculation of h0:
+         * h0 is the coefficient to the power 2^0 so it collects (sums) all products that have the power 2^0.
+         * f0 * g0 really is f0 * 2^0 * g0 * 2^0 = (f0 * g0) * 2^0.
+         * f1 * g9 really is f1 * 2^26 * g9 * 2^230 = f1 * g9 * 2^256 = 2 * f1 * g9 * 2^255 congruent 2 * 19 * f1 * g9 * 2^0 modulo p.
+         * f2 * g8 really is f2 * 2^51 * g8 * 2^204 = f2 * g8 * 2^255 congruent 19 * f2 * g8 * 2^0 modulo p.
+         * and so on...
+         */
+        long h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
+        long h1 = f0g1 + f1g0    + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
+        long h2 = f0g2 + f1g1_2  + f2g0    + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
+        long h3 = f0g3 + f1g2    + f2g1    + f3g0    + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
+        long h4 = f0g4 + f1g3_2  + f2g2    + f3g1_2  + f4g0    + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
+        long h5 = f0g5 + f1g4    + f2g3    + f3g2    + f4g1    + f5g0    + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
+        long h6 = f0g6 + f1g5_2  + f2g4    + f3g3_2  + f4g2    + f5g1_2  + f6g0    + f7g9_38 + f8g8_19 + f9g7_38;
+        long h7 = f0g7 + f1g6    + f2g5    + f3g4    + f4g3    + f5g2    + f6g1    + f7g0    + f8g9_19 + f9g8_19;
+        long h8 = f0g8 + f1g7_2  + f2g6    + f3g5_2  + f4g4    + f5g3_2  + f6g2    + f7g1_2  + f8g0    + f9g9_38;
+        long h9 = f0g9 + f1g8    + f2g7    + f3g6    + f4g5    + f5g4    + f6g3    + f7g2    + f8g1    + f9g0;
        long carry0;
        long carry1;
        long carry2;
@@ -317,16 +375,21 @@ public class Ed25519FieldElement extends FieldElement {

    /**
     * h = f * f
+     * <p>
     * Can overlap h with f.
-     *
+     * <p>
     * Preconditions:
-     *    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
-     *
+     * <p><ul>
+     * <li>|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+     * </ul><p>
     * Postconditions:
-     *    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
-     *
-     * See {@link Ed25519FieldElement#multiply(FieldElement)} for discussion
+     * <p><ul>
+     * <li>|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
+     * </ul><p>
+     * See {@link #multiply(FieldElement)} for discussion
     * of implementation strategy.
+     *
+     * @return The (reasonably reduced) square of this field element.
     */
    public FieldElement square() {
        int f0 = t[0];
@@ -407,16 +470,21 @@ public class Ed25519FieldElement extends FieldElement {
        long f8f8_19 = f8   * (long) f8_19;
        long f8f9_38 = f8   * (long) f9_38;
        long f9f9_38 = f9   * (long) f9_38;
-        long h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
-        long h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
-        long h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
-        long h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
-        long h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
-        long h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
-        long h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
-        long h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
-        long h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
-        long h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
+
+        /**
+         * Same procedure as in multiply, but this time we have a higher symmetry leading to less summands.
+         * e.g. f1f9_76 really stands for f1 * 2^26 * f9 * 2^230 + f9 * 2^230 + f1 * 2^26 congruent 2 * 2 * 19 * f1 * f9  2^0 modulo p.
+         */
+        long h0 = f0f0   + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
+        long h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
+        long h2 = f0f2_2 + f1f1_2  + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
+        long h3 = f0f3_2 + f1f2_2  + f4f9_38 + f5f8_38 + f6f7_38;
+        long h4 = f0f4_2 + f1f3_4  + f2f2    + f5f9_76 + f6f8_38 + f7f7_38;
+        long h5 = f0f5_2 + f1f4_2  + f2f3_2  + f6f9_38 + f7f8_38;
+        long h6 = f0f6_2 + f1f5_4  + f2f4_2  + f3f3_2  + f7f9_76 + f8f8_19;
+        long h7 = f0f7_2 + f1f6_2  + f2f5_2  + f3f4_2  + f8f9_38;
+        long h8 = f0f8_2 + f1f7_4  + f2f6_2  + f3f5_4  + f4f4    + f9f9_38;
+        long h9 = f0f9_2 + f1f8_2  + f2f7_2  + f3f6_2  + f4f5_2;
        long carry0;
        long carry1;
        long carry2;
@@ -463,16 +531,21 @@ public class Ed25519FieldElement extends FieldElement {

    /**
     * h = 2 * f * f
+     * <p>
     * Can overlap h with f.
-     *
+     * <p>
     * Preconditions:
-     *    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
-     *
+     * <p><ul>
+     * <li>|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+     * </ul><p>
     * Postconditions:
-     *    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
-     *
-     * See {@link Ed25519FieldElement#multiply(FieldElement)} for discussion
+     * <p><ul>
+     * <li>|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
+     * </ul><p>
+     * See {@link #multiply(FieldElement)} for discussion
     * of implementation strategy.
+     *
+     * @return The (reasonably reduced) square of this field element times 2.
     */
    public FieldElement squareAndDouble() {
        int f0 = t[0];
@@ -553,16 +626,16 @@ public class Ed25519FieldElement extends FieldElement {
        long f8f8_19 = f8   * (long) f8_19;
        long f8f9_38 = f8   * (long) f9_38;
        long f9f9_38 = f9   * (long) f9_38;
-        long h0 = f0f0  +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
-        long h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
-        long h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
-        long h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
-        long h4 = f0f4_2+f1f3_4 +f2f2   +f5f9_76+f6f8_38+f7f7_38;
-        long h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
-        long h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
-        long h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
-        long h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4   +f9f9_38;
-        long h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
+        long h0 = f0f0   + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
+        long h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
+        long h2 = f0f2_2 + f1f1_2  + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
+        long h3 = f0f3_2 + f1f2_2  + f4f9_38 + f5f8_38 + f6f7_38;
+        long h4 = f0f4_2 + f1f3_4  + f2f2    + f5f9_76 + f6f8_38 + f7f7_38;
+        long h5 = f0f5_2 + f1f4_2  + f2f3_2  + f6f9_38 + f7f8_38;
+        long h6 = f0f6_2 + f1f5_4  + f2f4_2  + f3f3_2  + f7f9_76 + f8f8_19;
+        long h7 = f0f7_2 + f1f6_2  + f2f5_2  + f3f4_2  + f8f9_38;
+        long h8 = f0f8_2 + f1f7_4  + f2f6_2  + f3f5_4  + f4f4    + f9f9_38;
+        long h9 = f0f9_2 + f1f8_2  + f2f7_2  + f3f6_2  + f4f5_2;
        long carry0;
        long carry1;
        long carry2;
@@ -618,120 +691,162 @@ public class Ed25519FieldElement extends FieldElement {
        return new Ed25519FieldElement(f, h);
    }

+    /**
+     * Invert this field element.
+     * <p>
+     * The inverse is found via Fermat's little theorem:<br>
+     * a^p congruent a mod p and therefore a^(p-2) congruent a^-1 mod p
+     *
+     * @return The inverse of this field element.
+     */
    public FieldElement invert() {
        FieldElement t0, t1, t2, t3;

-        // z2 = z1^2^1
+        // 2 == 2 * 1
        t0 = square();
+
+        // TODO -CR BR: What is this? Is the author superstitious?
        for (int i = 1; i < 1; ++i) { // Don't remove this
            t0 = t0.square();
        }

-        // z8 = z2^2^2;
+        // 4 == 2 * 2
        t1 = t0.square();
+
+        // 8 == 2 * 4
        for (int i = 1; i < 2; ++i) {
            t1 = t1.square();
        }

-        // z9 = z1*z8
+        // 9 == 8 + 1
        t1 = multiply(t1);

-        // z11 = z2*z9
+        // 11 == 9 + 2
        t0 = t0.multiply(t1);

-        // z22 = z11^2^1
+        // 22 == 2 * 11
        t2 = t0.square();
+
+        // TODO -CR BR: see above
        for (int i = 1; i < 1; ++i) { // Don't remove this
            t2 = t2.square();
        }

-        // z_5_0 = z9*z22
+        // 31 == 22 + 9
        t1 = t1.multiply(t2);

-        // z_10_5 = z_5_0^2^5
+        // 2^6 - 2^1
        t2 = t1.square();
+
+        // 2^10 - 2^5
        for (int i = 1; i < 5; ++i) {
            t2 = t2.square();
        }

-        // z_10_0 = z_10_5*z_5_0
+        // 2^10 - 2^0
        t1 = t2.multiply(t1);

-        // z_20_10 = z_10_0^2^10
+        // 2^11 - 2^1
        t2 = t1.square();
+
+        // 2^20 - 2^10
        for (int i = 1; i < 10; ++i) {
            t2 = t2.square();
        }

-        // z_20_0 = z_20_10*z_10_0
+        // 2^20 - 2^0
        t2 = t2.multiply(t1);

-        // z_40_20 = z_20_0^2^20
+        // 2^21 - 2^1
        t3 = t2.square();
+
+        // 2^40 - 2^20
        for (int i = 1; i < 20; ++i) {
            t3 = t3.square();
        }

-        // z_40_0 = z_40_20*z_20_0
+        // 2^40 - 2^0
        t2 = t3.multiply(t2);

-        // z_50_10 = z_40_0^2^10
+        // 2^41 - 2^1
        t2 = t2.square();
+
+        // 2^50 - 2^10
        for (int i = 1; i < 10; ++i) {
            t2 = t2.square();
        }

-        // z_50_0 = z_50_10*z_10_0
+        // 2^50 - 2^0
        t1 = t2.multiply(t1);

-        // z_100_50 = z_50_0^2^50
+        // 2^51 - 2^1
        t2 = t1.square();
+
+        // 2^100 - 2^50
        for (int i = 1; i < 50; ++i) {
            t2 = t2.square();
        }

-        // z_100_0 = z_100_50*z_50_0
+        // 2^100 - 2^0
        t2 = t2.multiply(t1);

-        // z_200_100 = z_100_0^2^100
+        // 2^101 - 2^1
        t3 = t2.square();
+
+        // 2^200 - 2^100
        for (int i = 1; i < 100; ++i) {
            t3 = t3.square();
        }

-        // z_200_0 = z_200_100*z_100_0
+        // 2^200 - 2^0
        t2 = t3.multiply(t2);

-        // z_250_50 = z_200_0^2^50
+        // 2^201 - 2^1
        t2 = t2.square();
+
+        // 2^250 - 2^50
        for (int i = 1; i < 50; ++i) {
            t2 = t2.square();
        }

-        // z_250_0 = z_250_50*z_50_0
+        // 2^250 - 2^0
        t1 = t2.multiply(t1);

-        // z_255_5 = z_250_0^2^5
+        // 2^251 - 2^1
        t1 = t1.square();
+
+        // 2^255 - 2^5
        for (int i = 1; i < 5; ++i) {
            t1 = t1.square();
        }

-        // z_255_21 = z_255_5*z11
+        // 2^255 - 21
        return t1.multiply(t0);
    }

+    /**
+     * Gets this field element to the power of (2^252 - 3).
+     * This is a helper function for calculating the square root.
+     * <p>
+     * TODO-CR BR: I think it makes sense to have a sqrt function.
+     *
+     * @return This field element to the power of (2^252 - 3).
+     */
    public FieldElement pow22523() {
        FieldElement t0, t1, t2;

-        // z2 = z1^2^1
+        // 2 == 2 * 1
        t0 = square();
+
+        // TODO -CR BR: see invert
        for (int i = 1; i < 1; ++i) { // Don't remove this
            t0 = t0.square();
        }

-        // z8 = z2^2^2;
+        // 4 == 2 * 2
        t1 = t0.square();
+
+        // 8 == 2 * 4
        for (int i = 1; i < 2; ++i) {
            t1 = t1.square();
        }
@@ -739,98 +854,112 @@ public class Ed25519FieldElement extends FieldElement {
        // z9 = z1*z8
        t1 = multiply(t1);

-        // z11 = z2*z9
+        // 11 == 9 + 2
        t0 = t0.multiply(t1);

-        // z22 = z11^2^1
+        // 22 == 2 * 11
        t0 = t0.square();
+
+        // TODO -CR BR: see above
        for (int i = 1; i < 1; ++i) { // Don't remove this
            t0 = t0.square();
        }

-        // z_5_0 = z9*z22
+        // 31 == 22 + 9
        t0 = t1.multiply(t0);

-        // z_10_5 = z_5_0^2^5
+        // 2^6 - 2^1
        t1 = t0.square();
+
+        // 2^10 - 2^5
        for (int i = 1; i < 5; ++i) {
            t1 = t1.square();
        }

-        // z_10_0 = z_10_5*z_5_0
+        // 2^10 - 2^0
        t0 = t1.multiply(t0);

-        // z_20_10 = z_10_0^2^10
+        // 2^11 - 2^1
        t1 = t0.square();
+
+        // 2^20 - 2^10
        for (int i = 1; i < 10; ++i) {
            t1 = t1.square();
        }

-        // z_20_0 = z_20_10*z_10_0
+        // 2^20 - 2^0
        t1 = t1.multiply(t0);

-        // z_40_20 = z_20_0^2^20
+        // 2^21 - 2^1
        t2 = t1.square();
+
+        // 2^40 - 2^20
        for (int i = 1; i < 20; ++i) {
            t2 = t2.square();
        }

-        // z_40_0 = z_40_20*z_20_0
+        // 2^40 - 2^0
        t1 = t2.multiply(t1);

-        // z_50_10 = z_40_0^2^10
+        // 2^41 - 2^1
        t1 = t1.square();
+
+        // 2^50 - 2^10
        for (int i = 1; i < 10; ++i) {
            t1 = t1.square();
        }

-        // z_50_0 = z_50_10*z_10_0
+        // 2^50 - 2^0
        t0 = t1.multiply(t0);

-        // z_100_50 = z_50_0^2^50
+        // 2^51 - 2^1
        t1 = t0.square();
+
+        // 2^100 - 2^50
        for (int i = 1; i < 50; ++i) {
            t1 = t1.square();
        }

-        // z_100_0 = z_100_50*z_50_0
+        // 2^100 - 2^0
        t1 = t1.multiply(t0);

-        // z_200_100 = z_100_0^2^100
+        // 2^101 - 2^1
        t2 = t1.square();
+
+        // 2^200 - 2^100
        for (int i = 1; i < 100; ++i) {
            t2 = t2.square();
        }

-        // z_200_0 = z_200_100*z_100_0
+        // 2^200 - 2^0
        t1 = t2.multiply(t1);

-        // z_250_50 = z_200_0^2^50
+        // 2^201 - 2^1
        t1 = t1.square();
+
+        // 2^250 - 2^50
        for (int i = 1; i < 50; ++i) {
            t1 = t1.square();
        }

-        // z_250_0 = z_250_50*z_50_0
+        // 2^250 - 2^0
        t0 = t1.multiply(t0);

-        // z_252_2 = z_250_0^2^2
+        // 2^251 - 2^1
        t0 = t0.square();
+
+        // 2^252 - 2^2
        for (int i = 1; i < 2; ++i) {
            t0 = t0.square();
        }

-        // z_252_3 = z_252_2*z1
+        // 2^252 - 3
        return multiply(t0);
    }

    @Override
    public int hashCode() {
-        int rv = 0;
-        for (int i = 0; i < 10; i++) {
-            rv ^= t[i];
-        }
-        return rv;
+        return Arrays.hashCode(t);
    }

    @Override
--- a/core/java/src/net/i2p/crypto/eddsa/math/ed25519/Ed25519LittleEndianEncoding.java
+++ b/core/java/src/net/i2p/crypto/eddsa/math/ed25519/Ed25519LittleEndianEncoding.java
@@ -1,32 +1,48 @@
 package net.i2p.crypto.eddsa.math.ed25519;

-import net.i2p.crypto.eddsa.math.Encoding;
-import net.i2p.crypto.eddsa.math.FieldElement;
+import net.i2p.crypto.eddsa.math.*;

+/**
+ * Helper class for encoding/decoding from/to the 32 byte representation.
+ * <p>
+ * Reviewed/commented by Bloody Rookie (nemproject@gmx.de)
+ */
 public class Ed25519LittleEndianEncoding extends Encoding {
    /**
-     * Preconditions:<br>
-     *   |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.<br><br>
+     * Encodes a given field element in its 32 byte representation. This is done in TWO steps.
+     * Step 1: Reduce the value of the field element modulo p.
+     * Step 2: Convert the field element to the 32 byte representation.
+     * <p>
+     * The idea for the modulo p reduction algorithm is as follows:
+     * <p>
+     * Assumption:
+     * <p><ul>
+     * <li>p = 2^255 - 19
+     * <li>h = h0 + 2^25 * h1 + 2^(26+25) * h2 + ... + 2^230 * h9 where 0 <= |hi| < 2^27 for all i=0,...,9.
+     * <li>h congruent r modulo p, i.e. h = r + q * p for some suitable 0 <= r < p and an integer q.
+     * </ul><p>
+     * Then q = [2^-255 * (h + 19 * 2^-25 * h9 + 1/2)] where [x] = floor(x).
+     * <p>
+     * Proof:
+     * <p>
+     * We begin with some very raw estimation for the bounds of some expressions:
+     * <pre>|h| < 2^230 * 2^30 = 2^260 ==> |r + q * p| < 2^260 ==> |q| < 2^10.
+     * ==> -1/4 <= a := 19^2 * 2^-255 * q < 1/4.
+     * |h - 2^230 * h9| = |h0 + ... + 2^204 * h8| < 2^204 * 2^30 = 2^234.
+     * ==> -1/4 <= b := 19 * 2^-255 * (h - 2^230 * h9) < 1/4</pre>
+     * Therefore 0 < 1/2 - a - b < 1.
+     * <p>
+     * Set x := r + 19 * 2^-255 * r + 1/2 - a - b then
+     * 0 <= x < 255 - 20 + 19 + 1 = 2^255 ==> 0 <= 2^-255 * x < 1. Since q is an integer we have
     *
-     * Write p=2^255-19; q=floor(h/p).<br>
-     * Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
-     * <br><br>
-     * Proof:<br>
-     *   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.<br>
-     *   Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
-     *   <br><br>
-     *   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).<br>
-     *   Then 0 < y < 1.
-     *   <br><br>
-     *   Write r=h-pq.<br>
-     *   Have 0 <= r <= p-1=2^255-20.<br>
-     *   Thus 0 <= r+19(2^-255)r < r+19(2^-255)2^255 <= 2^255-1.
-     *   <br><br>
-     *   Write x=r+19(2^-255)r+y.<br>
-     *   Then 0 < x < 2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
-     *   <br><br>
-     *   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))<br>
-     *   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
+     * <pre>[q + 2^-255 * x] = q        (1)</pre>
+     * <p>
+     * Have a closer look at x:
+     * <pre>x = h - q * (2^255 - 19) + 19 * 2^-255 * (h - q * (2^255 - 19)) + 1/2 - 19^2 * 2^-255 * q - 19 * 2^-255 * (h - 2^230 * h9)
+     *   = h - q * 2^255 + 19 * q + 19 * 2^-255 * h - 19 * q + 19^2 * 2^-255 * q + 1/2 - 19^2 * 2^-255 * q - 19 * 2^-255 * h + 19 * 2^-25 * h9
+     *   = h + 19 * 2^-25 * h9 + 1/2 - q^255.</pre>
+     * <p>
+     * Inserting the expression for x into (1) we get the desired expression for q.
     */
    public byte[] encode(FieldElement x) {
        int[] h = ((Ed25519FieldElement)x).t;
@@ -52,6 +68,8 @@ public class Ed25519LittleEndianEncoding extends Encoding {
        int carry8;
        int carry9;

+        // Step 1:
+        // Calculate q
        q = (19 * h9 + (((int) 1) << 24)) >> 25;
        q = (h0 + q) >> 26;
        q = (h1 + q) >> 25;
@@ -64,9 +82,9 @@ public class Ed25519LittleEndianEncoding extends Encoding {
        q = (h8 + q) >> 26;
        q = (h9 + q) >> 25;

-        /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
+        // r = h - q * p = h - 2^255 * q + 19 * q
+        // First add 19 * q then discard the bit 255
        h0 += 19 * q;
-        /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */

        carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
        carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
@@ -78,15 +96,8 @@ public class Ed25519LittleEndianEncoding extends Encoding {
        carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
        carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
        carry9 = h9 >> 25;               h9 -= carry9 << 25;
-                        /* h10 = carry9 */
-
-        /*
-        Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
-        Have h0+...+2^230 h9 between 0 and 2^255-1;
-        evidently 2^255 h10-2^255 q = 0.
-        Goal: Output h0+...+2^230 h9.
-        */

+        // Step 2 (straight forward conversion):
        byte[] s = new byte[32];
        s[0] = (byte) h0;
        s[1] = (byte) (h0 >> 8);
@@ -139,7 +150,10 @@ public class Ed25519LittleEndianEncoding extends Encoding {
    }

    /**
-     * Ignores top bit.
+     * Decodes a given field element in its 10 byte 2^25.5 representation.
+     *
+     * @param in The 32 byte representation.
+     * @return The field element in its 2^25.5 bit representation.
     */
    public FieldElement decode(byte[] in) {
        long h0 = load_4(in, 0);
@@ -151,7 +165,7 @@ public class Ed25519LittleEndianEncoding extends Encoding {
        long h6 = load_3(in, 20) << 7;
        long h7 = load_3(in, 23) << 5;
        long h8 = load_3(in, 26) << 4;
-        long h9 = (load_3(in, 29) & 8388607) << 2;
+        long h9 = (load_3(in, 29) & 0x7FFFFF) << 2;
        long carry0;
        long carry1;
        long carry2;
@@ -163,6 +177,7 @@ public class Ed25519LittleEndianEncoding extends Encoding {
        long carry8;
        long carry9;

+        // Remember: 2^255 congruent 19 modulo p
        carry9 = (h9 + (long) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
        carry1 = (h1 + (long) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
        carry3 = (h3 + (long) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
@@ -190,11 +205,15 @@ public class Ed25519LittleEndianEncoding extends Encoding {
    }

    /**
+     * Is the FieldElement negative in this encoding?
+     * <p>
     * Return true if x is in {1,3,5,...,q-2}<br>
-     * Return false if x is in {0,2,4,...,q-1}<br><br>
+     * Return false if x is in {0,2,4,...,q-1}
+     * <p>
+     * Preconditions:
+     * <p><ul>
+     * <li>|x| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
     *
-     * Preconditions:<br>
-     *    |x| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
     * @return true if x is in {1,3,5,...,q-2}, false otherwise.
     */
    public boolean isNegative(FieldElement x) {
--- a/core/java/src/net/i2p/crypto/eddsa/math/ed25519/Ed25519ScalarOps.java
+++ b/core/java/src/net/i2p/crypto/eddsa/math/ed25519/Ed25519ScalarOps.java
@@ -4,40 +4,51 @@ import net.i2p.crypto.eddsa.math.ScalarOps;
 import static net.i2p.crypto.eddsa.math.ed25519.Ed25519LittleEndianEncoding.load_3;
 import static net.i2p.crypto.eddsa.math.ed25519.Ed25519LittleEndianEncoding.load_4;

+/**
+ * Class for reducing a huge integer modulo the group order q and
+ * doing a combined multiply plus add plus reduce operation.
+ * <p>
+ * q = 2^252 + 27742317777372353535851937790883648493.
+ * <p>
+ * Reviewed/commented by Bloody Rookie (nemproject@gmx.de)
+ */
 public class Ed25519ScalarOps implements ScalarOps {

    /**
-     * Input:<br>
-     *   s[0]+256*s[1]+...+256^63*s[63] = s<br><br>
-     *
-     * Output:<br>
-     *   s[0]+256*s[1]+...+256^31*s[31] = s mod l<br>
-     *   where l = 2^252 + 27742317777372353535851937790883648493.
+     * Reduction modulo the group order q.
+     * <p>
+     * Input:
+     *   s[0]+256*s[1]+...+256^63*s[63] = s
+     * <p>
+     * Output:
+     *   s[0]+256*s[1]+...+256^31*s[31] = s mod q
+     *   where q = 2^252 + 27742317777372353535851937790883648493.
     */
    public byte[] reduce(byte[] s) {
-        long s0 = 2097151 & load_3(s, 0);
-        long s1 = 2097151 & (load_4(s, 2) >> 5);
-        long s2 = 2097151 & (load_3(s, 5) >> 2);
-        long s3 = 2097151 & (load_4(s, 7) >> 7);
-        long s4 = 2097151 & (load_4(s, 10) >> 4);
-        long s5 = 2097151 & (load_3(s, 13) >> 1);
-        long s6 = 2097151 & (load_4(s, 15) >> 6);
-        long s7 = 2097151 & (load_3(s, 18) >> 3);
-        long s8 = 2097151 & load_3(s, 21);
-        long s9 = 2097151 & (load_4(s, 23) >> 5);
-        long s10 = 2097151 & (load_3(s, 26) >> 2);
-        long s11 = 2097151 & (load_4(s, 28) >> 7);
-        long s12 = 2097151 & (load_4(s, 31) >> 4);
-        long s13 = 2097151 & (load_3(s, 34) >> 1);
-        long s14 = 2097151 & (load_4(s, 36) >> 6);
-        long s15 = 2097151 & (load_3(s, 39) >> 3);
-        long s16 = 2097151 & load_3(s, 42);
-        long s17 = 2097151 & (load_4(s, 44) >> 5);
-        long s18 = 2097151 & (load_3(s, 47) >> 2);
-        long s19 = 2097151 & (load_4(s, 49) >> 7);
-        long s20 = 2097151 & (load_4(s, 52) >> 4);
-        long s21 = 2097151 & (load_3(s, 55) >> 1);
-        long s22 = 2097151 & (load_4(s, 57) >> 6);
+        // s0,..., s22 have 21 bits, s23 has 29 bits
+        long s0 = 0x1FFFFF & load_3(s, 0);
+        long s1 = 0x1FFFFF & (load_4(s, 2) >> 5);
+        long s2 = 0x1FFFFF & (load_3(s, 5) >> 2);
+        long s3 = 0x1FFFFF & (load_4(s, 7) >> 7);
+        long s4 = 0x1FFFFF & (load_4(s, 10) >> 4);
+        long s5 = 0x1FFFFF & (load_3(s, 13) >> 1);
+        long s6 = 0x1FFFFF & (load_4(s, 15) >> 6);
+        long s7 = 0x1FFFFF & (load_3(s, 18) >> 3);
+        long s8 = 0x1FFFFF & load_3(s, 21);
+        long s9 = 0x1FFFFF & (load_4(s, 23) >> 5);
+        long s10 = 0x1FFFFF & (load_3(s, 26) >> 2);
+        long s11 = 0x1FFFFF & (load_4(s, 28) >> 7);
+        long s12 = 0x1FFFFF & (load_4(s, 31) >> 4);
+        long s13 = 0x1FFFFF & (load_3(s, 34) >> 1);
+        long s14 = 0x1FFFFF & (load_4(s, 36) >> 6);
+        long s15 = 0x1FFFFF & (load_3(s, 39) >> 3);
+        long s16 = 0x1FFFFF & load_3(s, 42);
+        long s17 = 0x1FFFFF & (load_4(s, 44) >> 5);
+        long s18 = 0x1FFFFF & (load_3(s, 47) >> 2);
+        long s19 = 0x1FFFFF & (load_4(s, 49) >> 7);
+        long s20 = 0x1FFFFF & (load_4(s, 52) >> 4);
+        long s21 = 0x1FFFFF & (load_3(s, 55) >> 1);
+        long s22 = 0x1FFFFF & (load_4(s, 57) >> 6);
        long s23 = (load_4(s, 60) >> 3);
        long carry0;
        long carry1;
@@ -57,6 +68,22 @@ public class Ed25519ScalarOps implements ScalarOps {
        long carry15;
        long carry16;

+        /**
+         * Lots of magic numbers :)
+         * To understand what's going on below, note that
+         *
+         * (1) q = 2^252 + q0 where q0 = 27742317777372353535851937790883648493.
+         * (2) s11 is the coefficient of 2^(11*21), s23 is the coefficient of 2^(^23*21) and 2^252 = 2^((23-11) * 21)).
+         * (3) 2^252 congruent -q0 modulo q.
+         * (4) -q0 = 666643 * 2^0 + 470296 * 2^21 + 654183 * 2^(2*21) - 997805 * 2^(3*21) + 136657 * 2^(4*21) - 683901 * 2^(5*21)
+         *
+         * Thus
+         * s23 * 2^(23*11) = s23 * 2^(12*21) * 2^(11*21) = s3 * 2^252 * 2^(11*21) congruent
+         * s23 * (666643 * 2^0 + 470296 * 2^21 + 654183 * 2^(2*21) - 997805 * 2^(3*21) + 136657 * 2^(4*21) - 683901 * 2^(5*21)) * 2^(11*21) modulo q =
+         * s23 * (666643 * 2^(11*21) + 470296 * 2^(12*21) + 654183 * 2^(13*21) - 997805 * 2^(14*21) + 136657 * 2^(15*21) - 683901 * 2^(16*21)).
+         *
+         * The same procedure is then applied for s22,...,s18.
+         */
        s11 += s23 * 666643;
        s12 += s23 * 470296;
        s13 += s23 * 654183;
@@ -111,6 +138,9 @@ public class Ed25519ScalarOps implements ScalarOps {
        // not used again
        //s18 = 0;

+        /**
+         * Time to reduce the coefficient in order not to get an overflow.
+         */
        carry6 = (s6 + (1<<20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
        carry8 = (s8 + (1<<20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
        carry10 = (s10 + (1<<20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
@@ -124,6 +154,9 @@ public class Ed25519ScalarOps implements ScalarOps {
        carry13 = (s13 + (1<<20)) >> 21; s14 += carry13; s13 -= carry13 << 21;
        carry15 = (s15 + (1<<20)) >> 21; s16 += carry15; s15 -= carry15 << 21;

+        /**
+         * Continue with above procedure.
+         */
        s5 += s17 * 666643;
        s6 += s17 * 470296;
        s7 += s17 * 654183;
@@ -178,6 +211,9 @@ public class Ed25519ScalarOps implements ScalarOps {
        // set below
        //s12 = 0;

+        /**
+         * Reduce coefficients again.
+         */
        carry0 = (s0 + (1<<20)) >> 21; s1 += carry0; s0 -= carry0 << 21;
        carry2 = (s2 + (1<<20)) >> 21; s3 += carry2; s2 -= carry2 << 21;
        carry4 = (s4 + (1<<20)) >> 21; s5 += carry4; s4 -= carry4 << 21;
@@ -216,6 +252,7 @@ public class Ed25519ScalarOps implements ScalarOps {
        //carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 << 21;
        carry11 = s11 >> 21; s12 = carry11; s11 -= carry11 << 21;

+        // TODO-CR BR: Is it really needed to do it TWO times? (it doesn't hurt, just a question).
        s0 += s12 * 666643;
        s1 += s12 * 470296;
        s2 += s12 * 654183;
@@ -237,6 +274,7 @@ public class Ed25519ScalarOps implements ScalarOps {
        carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21;
        carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21;

+        // s0, ..., s11 got 21 bits each.
        byte[] result = new byte[32];
        result[0] = (byte) s0;
        result[1] = (byte) (s0 >> 8);
@@ -275,51 +313,54 @@ public class Ed25519ScalarOps implements ScalarOps {


    /**
-     * Input:<br>
-     *   a[0]+256*a[1]+...+256^31*a[31] = a<br>
-     *   b[0]+256*b[1]+...+256^31*b[31] = b<br>
-     *   c[0]+256*c[1]+...+256^31*c[31] = c<br><br>
-     *
-     * Output:<br>
-     *   result[0]+256*result[1]+...+256^31*result[31] = (ab+c) mod l<br>
-     *   where l = 2^252 + 27742317777372353535851937790883648493.
+     * Input:
+     * <p><ul>
+     * <li>a[0]+256*a[1]+...+256^31*a[31] = a
+     * <li>b[0]+256*b[1]+...+256^31*b[31] = b
+     * <li>c[0]+256*c[1]+...+256^31*c[31] = c
+     * </ul><p>
+     * Output:
+     *   result[0]+256*result[1]+...+256^31*result[31] = (ab+c) mod q
+     *   where q = 2^252 + 27742317777372353535851937790883648493.
+     * <p>
+     * See the comments in {@link #reduce(byte[])} for an explanation of the algorithm.
     */
    public byte[] multiplyAndAdd(byte[] a, byte[] b, byte[] c) {
-        long a0 = 2097151 & load_3(a, 0);
-        long a1 = 2097151 & (load_4(a, 2) >> 5);
-        long a2 = 2097151 & (load_3(a, 5) >> 2);
-        long a3 = 2097151 & (load_4(a, 7) >> 7);
-        long a4 = 2097151 & (load_4(a, 10) >> 4);
-        long a5 = 2097151 & (load_3(a, 13) >> 1);
-        long a6 = 2097151 & (load_4(a, 15) >> 6);
-        long a7 = 2097151 & (load_3(a, 18) >> 3);
-        long a8 = 2097151 & load_3(a, 21);
-        long a9 = 2097151 & (load_4(a, 23) >> 5);
-        long a10 = 2097151 & (load_3(a, 26) >> 2);
+        long a0 = 0x1FFFFF & load_3(a, 0);
+        long a1 = 0x1FFFFF & (load_4(a, 2) >> 5);
+        long a2 = 0x1FFFFF & (load_3(a, 5) >> 2);
+        long a3 = 0x1FFFFF & (load_4(a, 7) >> 7);
+        long a4 = 0x1FFFFF & (load_4(a, 10) >> 4);
+        long a5 = 0x1FFFFF & (load_3(a, 13) >> 1);
+        long a6 = 0x1FFFFF & (load_4(a, 15) >> 6);
+        long a7 = 0x1FFFFF & (load_3(a, 18) >> 3);
+        long a8 = 0x1FFFFF & load_3(a, 21);
+        long a9 = 0x1FFFFF & (load_4(a, 23) >> 5);
+        long a10 = 0x1FFFFF & (load_3(a, 26) >> 2);
        long a11 = (load_4(a, 28) >> 7);
-        long b0 = 2097151 & load_3(b, 0);
-        long b1 = 2097151 & (load_4(b, 2) >> 5);
-        long b2 = 2097151 & (load_3(b, 5) >> 2);
-        long b3 = 2097151 & (load_4(b, 7) >> 7);
-        long b4 = 2097151 & (load_4(b, 10) >> 4);
-        long b5 = 2097151 & (load_3(b, 13) >> 1);
-        long b6 = 2097151 & (load_4(b, 15) >> 6);
-        long b7 = 2097151 & (load_3(b, 18) >> 3);
-        long b8 = 2097151 & load_3(b, 21);
-        long b9 = 2097151 & (load_4(b, 23) >> 5);
-        long b10 = 2097151 & (load_3(b, 26) >> 2);
+        long b0 = 0x1FFFFF & load_3(b, 0);
+        long b1 = 0x1FFFFF & (load_4(b, 2) >> 5);
+        long b2 = 0x1FFFFF & (load_3(b, 5) >> 2);
+        long b3 = 0x1FFFFF & (load_4(b, 7) >> 7);
+        long b4 = 0x1FFFFF & (load_4(b, 10) >> 4);
+        long b5 = 0x1FFFFF & (load_3(b, 13) >> 1);
+        long b6 = 0x1FFFFF & (load_4(b, 15) >> 6);
+        long b7 = 0x1FFFFF & (load_3(b, 18) >> 3);
+        long b8 = 0x1FFFFF & load_3(b, 21);
+        long b9 = 0x1FFFFF & (load_4(b, 23) >> 5);
+        long b10 = 0x1FFFFF & (load_3(b, 26) >> 2);
        long b11 = (load_4(b, 28) >> 7);
-        long c0 = 2097151 & load_3(c, 0);
-        long c1 = 2097151 & (load_4(c, 2) >> 5);
-        long c2 = 2097151 & (load_3(c, 5) >> 2);
-        long c3 = 2097151 & (load_4(c, 7) >> 7);
-        long c4 = 2097151 & (load_4(c, 10) >> 4);
-        long c5 = 2097151 & (load_3(c, 13) >> 1);
-        long c6 = 2097151 & (load_4(c, 15) >> 6);
-        long c7 = 2097151 & (load_3(c, 18) >> 3);
-        long c8 = 2097151 & load_3(c, 21);
-        long c9 = 2097151 & (load_4(c, 23) >> 5);
-        long c10 = 2097151 & (load_3(c, 26) >> 2);
+        long c0 = 0x1FFFFF & load_3(c, 0);
+        long c1 = 0x1FFFFF & (load_4(c, 2) >> 5);
+        long c2 = 0x1FFFFF & (load_3(c, 5) >> 2);
+        long c3 = 0x1FFFFF & (load_4(c, 7) >> 7);
+        long c4 = 0x1FFFFF & (load_4(c, 10) >> 4);
+        long c5 = 0x1FFFFF & (load_3(c, 13) >> 1);
+        long c6 = 0x1FFFFF & (load_4(c, 15) >> 6);
+        long c7 = 0x1FFFFF & (load_3(c, 18) >> 3);
+        long c8 = 0x1FFFFF & load_3(c, 21);
+        long c9 = 0x1FFFFF & (load_4(c, 23) >> 5);
+        long c10 = 0x1FFFFF & (load_3(c, 26) >> 2);
        long c11 = (load_4(c, 28) >> 7);
        long s0;
        long s1;