- getFramedAveragePeerClockSkew() now returns a long (ms);
        was a Long (s)
      - Implement NTP-style clock slewing so the clock is adjusted
        gradually
      - Implement clock strata so we prefer better clocks
      - Implement a timestamper in the transport so we will periodically
        update the clock even if NTP is not working
        This allows the router to converge the clock instead of simply
        hoping the first connected peer is correct.
      - Slow down NTP attempts after several consecutive failures
This commit is contained in:
zzz
2010-02-13 01:20:23 +00:00
parent 375118fe02
commit 4f70a7d0fe
7 changed files with 240 additions and 45 deletions

View File

@@ -49,7 +49,7 @@ public abstract class CommSystemFacade implements Service {
* Return framed average clock skew of connected peers in seconds, or null if we cannot answer.
* CommSystemFacadeImpl overrides this.
*/
public Long getFramedAveragePeerClockSkew(int percentToInclude) { return null; }
public long getFramedAveragePeerClockSkew(int percentToInclude) { return 0; }
/**
* Determine under what conditions we are remotely reachable.

View File

@@ -15,11 +15,31 @@ import net.i2p.util.Log;
*/
public class RouterClock extends Clock {
/**
* How often we will slew the clock
* i.e. ppm = 1000000/MAX_SLEW
* We should be able to slew really fast,
* this is probably a lot faster than what NTP does
* 1/50 is 12s in a 10m tunnel lifetime, that should be fine.
* All of this is @since 0.7.12
*/
private static final long MAX_SLEW = 50;
private static final int DEFAULT_STRATUM = 8;
private static final int WORST_STRATUM = 16;
/** the max NTP Timestamper delay is 30m right now, make this longer than that */
private static final long MIN_DELAY_FOR_WORSE_STRATUM = 45*60*1000;
private volatile long _desiredOffset;
private volatile long _lastSlewed;
/** use system time for this */
private long _lastChanged;
private int _lastStratum;
RouterContext _contextRC; // LINT field hides another field
public RouterClock(RouterContext context) {
super(context);
_contextRC = context;
_lastStratum = WORST_STRATUM;
}
/**
@@ -29,6 +49,16 @@ public class RouterClock extends Clock {
*/
@Override
public void setOffset(long offsetMs, boolean force) {
setOffset(offsetMs, force, DEFAULT_STRATUM);
}
/** @since 0.7.12 */
private void setOffset(long offsetMs, int stratum) {
setOffset(offsetMs, false, stratum);
}
/** @since 0.7.12 */
private void setOffset(long offsetMs, boolean force, int stratum) {
long delta = offsetMs - _offset;
if (!force) {
if ((offsetMs > MAX_OFFSET) || (offsetMs < 0 - MAX_OFFSET)) {
@@ -45,59 +75,127 @@ public class RouterClock extends Clock {
}
}
if ((delta < MIN_OFFSET_CHANGE) && (delta > 0 - MIN_OFFSET_CHANGE)) {
getLog().debug("Not changing offset since it is only " + delta + "ms");
// let's be perfect
if (delta == 0) {
getLog().debug("Not changing offset, delta=0");
_alreadyChanged = true;
return;
}
// only listen to a worse stratum if it's been a while
if (_alreadyChanged && stratum > _lastStratum &&
System.currentTimeMillis() - _lastChanged < MIN_DELAY_FOR_WORSE_STRATUM) {
getLog().warn("Ignoring update from a stratum " + stratum +
" clock, we recently had an update from a stratum " + _lastStratum + " clock");
return;
}
// If so configured, check sanity of proposed clock offset
if (Boolean.valueOf(_contextRC.getProperty("router.clockOffsetSanityCheck","true")).booleanValue() &&
_alreadyChanged) {
// Try calculating peer clock skew
Long peerClockSkew = _contextRC.commSystem().getFramedAveragePeerClockSkew(50);
if (peerClockSkew != null) {
long currentPeerClockSkew = _contextRC.commSystem().getFramedAveragePeerClockSkew(50);
// Predict the effect of applying the proposed clock offset
long currentPeerClockSkew = peerClockSkew.longValue();
long predictedPeerClockSkew = currentPeerClockSkew + (delta / 1000l);
long predictedPeerClockSkew = currentPeerClockSkew + delta;
// Fail sanity check if applying the offset would increase peer clock skew
if ((Math.abs(predictedPeerClockSkew) > (Math.abs(currentPeerClockSkew) + 5)) ||
(Math.abs(predictedPeerClockSkew) > 20)) {
if ((Math.abs(predictedPeerClockSkew) > (Math.abs(currentPeerClockSkew) + 5*1000)) ||
(Math.abs(predictedPeerClockSkew) > 20*1000)) {
getLog().error("Ignoring clock offset " + offsetMs + "ms (current " + _offset +
"ms) since it would increase peer clock skew from " + currentPeerClockSkew +
"s to " + predictedPeerClockSkew + "s. Broken server in pool.ntp.org?");
"ms to " + predictedPeerClockSkew + "ms. Bad time server?");
return;
} else {
getLog().debug("Approving clock offset " + offsetMs + "ms (current " + _offset +
"ms) since it would decrease peer clock skew from " + currentPeerClockSkew +
"s to " + predictedPeerClockSkew + "s.");
"ms to " + predictedPeerClockSkew + "ms.");
}
}
} // check sanity
}
if (_alreadyChanged) {
// Update the target offset, slewing will take care of the rest
if (delta > 15*1000)
getLog().error("Warning - Updating clock offset to " + offsetMs + "ms from " + _offset + "ms");
getLog().error("Warning - Updating target clock offset to " + offsetMs + "ms from " + _offset + "ms, Stratum " + stratum);
else if (getLog().shouldLog(Log.INFO))
getLog().info("Updating clock offset to " + offsetMs + "ms from " + _offset + "ms");
getLog().info("Updating target clock offset to " + offsetMs + "ms from " + _offset + "ms, Stratum " + stratum);
if (!_statCreated) {
_contextRC.statManager().createRateStat("clock.skew", "How far is the already adjusted clock being skewed?", "Clock", new long[] { 10*60*1000, 3*60*60*1000, 24*60*60*60 });
_statCreated = true;
}
_contextRC.statManager().addRateData("clock.skew", delta, 0);
_desiredOffset = offsetMs;
} else {
getLog().log(Log.INFO, "Initializing clock offset to " + offsetMs + "ms from " + _offset + "ms");
getLog().log(Log.INFO, "Initializing clock offset to " + offsetMs + "ms, Stratum " + stratum);
_alreadyChanged = true;
_offset = offsetMs;
_desiredOffset = offsetMs;
// this is used by the JobQueue
fireOffsetChanged(delta);
}
_alreadyChanged = true;
_offset = offsetMs;
fireOffsetChanged(delta);
_lastChanged = System.currentTimeMillis();
_lastStratum = stratum;
}
/**
* @param stratum used to determine whether we should ignore
* @since 0.7.12
*/
@Override
public void setNow(long realTime, int stratum) {
long diff = realTime - System.currentTimeMillis();
setOffset(diff, stratum);
}
/**
* Retrieve the current time synchronized with whatever reference clock is in use.
* Do really simple clock slewing, like NTP but without jitter prevention.
* Slew the clock toward the desired offset, but only up to a maximum slew rate,
* and never let the clock go backwards because of slewing.
*
* Take care to only access the volatile variables once for speed and to
* avoid having another thread change them
*
* This is called about a zillion times a second, so we can do the slewing right
* here rather than in some separate thread to keep it simple.
* Avoiding backwards clocks when updating in a thread would be hard too.
*/
@Override
public long now() {
long systemNow = System.currentTimeMillis();
// copy the global, so two threads don't both increment or decrement _offset
long offset = _offset;
if (systemNow >= _lastSlewed + MAX_SLEW) {
// copy the global
long desiredOffset = _desiredOffset;
if (desiredOffset > offset) {
// slew forward
_offset = ++offset;
_lastSlewed = systemNow;
} else if (desiredOffset < offset) {
// slew backward, but don't let the clock go backward
// this should be the first call since systemNow
// was greater than lastSled + MAX_SLEW, i.e. different
// from the last systemNow, thus we won't let the clock go backward,
// no need to track when we were last called.
_offset = --offset;
_lastSlewed = systemNow;
}
}
return offset + systemNow;
}
/*
* How far we still have to slew, for diagnostics
* @since 0.7.12
*/
public long getDeltaOffset() {
return _desiredOffset - _offset;
}
}

View File

@@ -51,6 +51,7 @@ public class CommSystemFacadeImpl extends CommSystemFacade {
_log.info("Starting up the comm system");
_manager = new TransportManager(_context);
_manager.startListening();
startTimestamper();
}
public void shutdown() {
@@ -78,20 +79,20 @@ public class CommSystemFacadeImpl extends CommSystemFacade {
/**
* @param percentToInclude 1-100
* @return Framed average clock skew of connected peers in seconds, or the clock offset if we cannot answer.
* @return Framed average clock skew of connected peers in milliseconds, or the clock offset if we cannot answer.
* Average is calculated over the middle "percentToInclude" peers.
* Todo: change Vectors to milliseconds
*/
@Override
public Long getFramedAveragePeerClockSkew(int percentToInclude) {
public long getFramedAveragePeerClockSkew(int percentToInclude) {
if (_manager == null) {
// round toward zero
return Long.valueOf(_context.clock().getOffset() / 1000);
return _context.clock().getOffset();
}
Vector skews = _manager.getClockSkews();
if (skews == null ||
skews.size() <= 0 ||
(skews.size() < 5 && _context.clock().getUpdatedSuccessfully())) {
return Long.valueOf(_context.clock().getOffset() / 1000);
return _context.clock().getOffset();
}
// Going to calculate, sort them
@@ -106,12 +107,12 @@ public class CommSystemFacadeImpl extends CommSystemFacade {
long sum = 0;
for (int i = first; i <= last; i++) {
long value = ((Long) (skews.get(i))).longValue();
if (_log.shouldLog(Log.DEBUG))
_log.debug("Adding clock skew " + i + " valued " + value + " s.");
//if (_log.shouldLog(Log.DEBUG))
// _log.debug("Adding clock skew " + i + " valued " + value + " s.");
sum = sum + value;
}
// Calculate average (round toward zero)
return Long.valueOf(sum / frameSize);
// Calculate average
return sum * 1000 / frameSize;
}
public List getBids(OutNetMessage msg) {
@@ -481,4 +482,38 @@ public class CommSystemFacadeImpl extends CommSystemFacade {
buf.append("</tt>");
return buf.toString();
}
/*
* Timestamper stuff
*
* This is used as a backup to NTP over UDP.
* @since 0.7.12
*/
private static final int TIME_START_DELAY = 5*60*1000;
private static final int TIME_REPEAT_DELAY = 10*60*1000;
/** @since 0.7.12 */
private void startTimestamper() {
SimpleScheduler.getInstance().addPeriodicEvent(new Timestamper(), TIME_START_DELAY, TIME_REPEAT_DELAY);
}
/**
* Update the clock offset based on the average of the peers.
* This uses the default stratum which is lower than any reasonable
* NTP source, so it will be ignored unless NTP is broken.
* @since 0.7.12
*/
private class Timestamper implements SimpleTimer.TimedEvent {
public void timeReached() {
// use the same % as in RouterClock so that check will never fail
// This is their our offset w.r.t. them...
long peerOffset = getFramedAveragePeerClockSkew(50);
if (peerOffset == 0)
return;
long currentOffset = _context.clock().getOffset();
// ... so we subtract it to get in sync with them
long newOffset = currentOffset - peerOffset;
_context.clock().setOffset(newOffset);
}
}
}