2010年12月27日 星期一

xauth client dpd can not work!

在ipsec tunnel建立後,「雙方」都同意DPD,那兩邊都會送R_U_HERE並等待R_U_HERE_ACK來「確認」
對方是否存在,當在XAUTH時,有個bug:(只有xauthserver會送R_U_HERE),這樣的結果會變成:
只有xauthserver會detect xclient是否存在,但xclient不會知道xserver出問題…
openswan-2.6.24:dpd.c,timer.c,state.c,ikev1.c
APIs: 
event_schedule(enum event_type type, time_t tm, struct state *st):註冊事件
dpd_event(struct state *st): 在event_schedule()的EVENT_DPD timer timeout時的callback

有關DPD的event有兩個EVENT_DPD和EVENT_DPD_TIMEOUT

stf_status dpd_init(struct state *st):在ipsec tunnel建立且DPD enabled時會inital DPD
  的計時器:在isakmp的sa建立及quick_mode(quick_inI2(),quick_inR1_outI2_cryptotail())建立時都會call一次。
dpd_outI(): 送出R_U_THERE,然後記錄DPD delay和DPD timeout時間

重要structs:
struct connection:一個tunnel profile object,記錄該參數,eg:name, policy, ike,ipsec life time etc...有關dpd的部分如下
  • time_t          dpd_delay;              /* time between checks */ 
  • time_t          dpd_timeout;            /* time after which we are dead */
  • enum dpd_action dpd_action;             /* what to do when we die */ 
struct state: state object,記錄sa的內容,狀態,及tunnel的一些細節(如雙方cookie: st_icookie,st_rcookie; nonce,,xauth的username,password;雙向spi,enc的list…)每個stat object會有一個connection object,有關dpd的部分如下:
  •     time_t              st_last_dpd;            /* Time of last DPD transmit */
  •     u_int32_t           st_dpd_seqno;           /* Next R_U_THERE to send */
  •     u_int32_t           st_dpd_expectseqno;     /* Next R_U_THERE_ACK to receive */
  •     u_int32_t           st_dpd_peerseqno;       /* global variables */
  •     struct event       *st_dpd_event;          /* backpointer for DPD events */
    所有state objs會放在一個global variables : statetable
    state.c:
    #define STATE_TABLE_SIZE 32

    static struct state *statetable[STATE_TABLE_SIZE];
struct qke_continuation:在quick mode時會由isakmp的st duplicates quick mode用的st
ikev1_continuations.h
struct qke_continuation {
    struct pluto_crypto_req_cont qke_pcrc;
    struct state                *st;            /* need to use abstract # */
    struct state                *isakmp_sa;     /* used in initiator */
    so_serial_t                  replacing;
    struct msg_digest           *md;            /* used in responder */
};



ikev1_quick.c
stf_status
quick_outI1(int whack_sock
            , struct state *isakmp_sa
            , struct connection *c
            , lset_t policy
            , unsigned long try
            , so_serial_t replacing)
{
    struct state *st = duplicate_state(isakmp_sa);
    struct qke_continuation *qke = alloc_thing(struct qke_continuation
                                               , "quick_outI1 KE");
    st->st_connection = c;
....
    qke->st = st;
    qke->isakmp_sa = isakmp_sa;
...



DPD working flow:
dpd_init(struct state *st)
   \===> find_state_ikev1()
   \===>event_schedule(EVENT_DPD, st->st_connection->dpd_delay, st);

dpd_event(struct state *st): 收到EVENT_DPD後開始check:
    如果是phase1,call p1_dpd_outI1() 如果是在phase2 call p2_dpd_outI1(), 但兩者最後都會call dpd_outI()
 
我的bug是在第一次dpd_init() 註冊後dpd_event()被呼叫時l p2_dpd_outI1()find_phase1_state(p2st->st_connection, ISAKMP_SA_ESTABLISHED_STATES);找不到…
../../include/pluto_constants.h
#define ISAKMP_SA_ESTABLISHED_STATES  (LELEM(STATE_MAIN_R3) | \
                                       LELEM(STATE_MAIN_I4) | \
                                       LELEM(STATE_AGGR_I2) | \
                                       LELEM(STATE_AGGR_R2))

state.c
struct state *
find_phase1_state(const struct connection *c, lset_t ok_states)
{
    struct state
        *st,
        *best = NULL;
    int i;

    for (i = 0; i < STATE_TABLE_SIZE; i++) {
        for (st = statetable[i]; st != NULL; st = st->st_hashchain_next) {
            if (LHAS(ok_states, st->st_state)   <<============### 這一段會fail…why!
                && c->host_pair == st->st_connection->host_pair
                && same_peer_ids(c, st->st_connection, NULL)
                && (best == NULL
                    || best->st_serialno < st->st_serialno))
                {
                    best = st;
                }
        }
    }

    return best;
}


include/constants.h:
typedef unsigned long long lset_t;
#define LEMPTY 0ULL
#define LELEM(opt) (1ULL << (opt))
#define LRANGE(lwb, upb) LRANGES(LELEM(lwb), LELEM(upb))
#define LRANGES(first, last) (last - first + last)
#define LHAS(set, elem)  ((LELEM(elem) & (set)) != LEMPTY)


#define ISAKMP_SA_ESTABLISHED_STATES  (LELEM(STATE_MAIN_R3) | \
                                       LELEM(STATE_MAIN_I4) | \
                                       LELEM(STATE_AGGR_I2) | \
                                       LELEM(STATE_AGGR_R2))

enum state_kind {
    STATE_UNDEFINED=0,  /* 0 -- most likely accident */

    /*  Opportunism states: see "Opportunistic Encryption" 2.2 */

    OPPO_ACQUIRE,       /* got an ACQUIRE message for this pair */
    OPPO_GW_DISCOVERED, /* got TXT specifying gateway */

    /* IKE states */

    STATE_MAIN_R0,
    STATE_MAIN_I1,
    STATE_MAIN_R1,
    STATE_MAIN_I2,
    STATE_MAIN_R2,
    STATE_MAIN_I3,
    STATE_MAIN_R3,
    STATE_MAIN_I4,

    STATE_AGGR_R0,
    STATE_AGGR_I1,
    STATE_AGGR_R1,
    STATE_AGGR_I2,
    STATE_AGGR_R2,

    STATE_QUICK_R0,
    STATE_QUICK_I1,
    STATE_QUICK_R1,
    STATE_QUICK_I2,
    STATE_QUICK_R2,

    STATE_INFO,
    STATE_INFO_PROTECTED,


    STATE_XAUTH_R0,    /* server state has sent request, awaiting reply */
    STATE_XAUTH_R1,    /* server state has sent success/fail, awaiting reply */
    STATE_MODE_CFG_R0,           /* these states are used on the responder */
    STATE_MODE_CFG_R1,
    STATE_MODE_CFG_R2,

    STATE_MODE_CFG_I1,           /* this is used on the initiator */

    STATE_XAUTH_I0,              /* client state is awaiting request */
    STATE_XAUTH_I1,              /* client state is awaiting result code */

先將find_phase1_state()加上一些messages
    for (i = 0; i < STATE_TABLE_SIZE; i++) {
        loglog(RC_LOG_SERIOUS, "%s:%d,i: %d\n", __FUNCTION__, __LINE__, i);
        for (st = statetable[i]; st != NULL; st = st->st_hashchain_next) {
            loglog(RC_LOG_SERIOUS, "%s:%d,a [%llX:%llX]LHAS: %s, host_pair: %s, "
                        "same_peer_ids: %s\n",
                        __FUNCTION__, __LINE__,
                        (unsigned long long)ok_states,
                        (unsigned long long)st->st_state,
                    LHAS(ok_states, st->st_state)?"Ture":"Fail",
                    c->host_pair == st->st_connection->host_pair?"Ture":"Fail",
                    same_peer_ids(c, st->st_connection, NULL)?"True":"Fail");
......
在正常沒有XAUTH時
以下 [C600:A」的0xc600就是「 ISAKMP_SA_ESTABLISHED_STATES」而0xA(10)就是C600h的bit 10. 所以為true,也就是state落在STATE_MAIN_I4。

"conn_xauthclient" #4: find_phase1_state:1195,a [C600:13]LHAS: Fail, host_pair: Ture, same_peer_ids: True
"conn_xauthclient" #4: find_phase1_state:1195,a [C600:A]LHAS: Ture, host_pair: Ture, same_peer_ids: True

然而:xauth client會是其它的value:1E.
"conn_xauthclient" #2: find_phase1_state:1195,a [C600:13]LHAS: Fail, host_pair: Ture, same_peer_ids: True
"conn_xauthclient" #2: find_phase1_state:1195,a [C600:1E]LHAS: Fail, host_pair: Ture, same_peer_ids: True


修改此macro後…似乎就可了,testing中…, 另一個小發現是:如果對方在時間內有先送R_U_THERE,該週期就不再送R_U_THERE了
#define ISAKMP_SA_ESTABLISHED_STATES  (LELEM(STATE_MAIN_R3) | \
                                       LELEM(STATE_MAIN_I4) | \
                                       LELEM(STATE_AGGR_I2) | \
                                       LELEM(STATE_AGGR_R2) | \
                                       LELEM(STATE_XAUTH_I1))
===================================================

RFC3706

1. Introduction

These schemes tend to be unidirectional (a HELLO only)
   or bidirectional (a HELLO/ACK pair).  For the purpose of this
   document, the term "heartbeat" will refer to a unidirectional message
   to prove liveliness.  Likewise, the term "keepalive" will refer to a
   bidirectional message.

Keepalives vs. Heartbeats

A send HELLO to B, B send ACK to A.
It is conceivable in such a scheme that peer B would
   never be interested in peer A's liveliness
(A在意B, 但B不care A, also bidirection)

Heartbeats:

只送HELLO來證明自已存在
unidirection
適用在大量的remote users or大量sessions.


set --debug-dpd to pluto for debug
------------------------------------------------

這是用pre_share key client的情況由initiator發R_U_THERE

"conn_conn_ipsec" #1: Main mode peer ID is ID_IPV4_ADDR: '172.21.46.139'
"conn_conn_ipsec" #1: transition from state STATE_MAIN_I3 to state STATE_MAIN_I4
"conn_conn_ipsec" #1: STATE_MAIN_I4: ISAKMP SA established {auth=OAKLEY_PRESHARE                                             D_KEY cipher=oakley_3des_cbc_192 prf=oakley_md5 group=modp1024}
"conn_conn_ipsec" #1: Dead Peer Detection (RFC 3706): enabled
| state: 1 requesting event none to be deleted by /home/samba/ubicom-distro/uCli                                             nux/openswan-2.6.24rc4/programs/pluto/dpd.c:159
| unpending state #1
| processing connection conn_conn_ipsec
"conn_conn_ipsec" #2: initiating Quick Mode PSK+ENCRYPT+TUNNEL+PFS {using isakmp                                             #1 msgid:5e3a8c6c proposal=3DES(3)_192-MD5(1)_128, AES(12)_128-MD5(1)_128, AES(1                                             2)_128-MD5(1)_128, AES(12)_192-MD5(1)_128 pfsgroup=OAKLEY_GROUP_MODP1024}
| processing connection conn_conn_ipsec
| removing pending policy for "none" {0x429728a4}
| processing connection conn_conn_ipsec
| processing connection conn_conn_ipsec
"conn_conn_ipsec" #2: Dead Peer Detection (RFC 3706): enabled
| state: 2 requesting event none to be deleted by /home/samba/ubicom-distro/uCli                                             nux/openswan-2.6.24rc4/programs/pluto/dpd.c:159
| state: 1 requesting event EVENT_DPD to be deleted by /home/samba/ubicom-distro                                             /uClinux/openswan-2.6.24rc4/programs/pluto/dpd.c:171
"conn_conn_ipsec" #2: transition from state STATE_QUICK_I1 to state STATE_QUICK_                                             I2
"conn_conn_ipsec" #2: STATE_QUICK_I2: sent QI2, IPsec SA established tunnel mode                                              {ESP=>0x2df79419 <0x56c3adf7 xfrm=3DES_0-HMAC_MD5 NATOA=none NATD=none DPD=enab                                             led}

     #####事實上,responder和initiator都會送R_U_THERE...#######

/ # | processing connection conn_IPSec
| received R_U_THERE seq:2568 time:1291695452 (state=#1 name="conn_IPSec")
| processing connection conn_IPSec
| processing dpd for state #3 ("conn_IPSec")
| not yet time for dpd event: 1291695452 < 1291695482

/ #
/ # | processing connection conn_IPSec
| processing dpd for state #3 ("conn_IPSec")
| scheduling timeout to 120
| state: 1 requesting event none to be deleted by /home/samba/ubicom-distro/uClinux/openswan-2.6.24rc4/programs/pluto/dpd.c:192
| sending R_U_THERE 29742 to 192.168.3.100:500 (state #1)
| processing connection conn_IPSec
| received R_U_THERE seq:2569 time:1291695482 (state=#1 name="conn_IPSec")
| state: 1 requesting event EVENT_DPD_TIMEOUT to be deleted by /home/samba/ubicom-distro/uClinux/openswan-2.6.24rc4/programs/pluto/dpd.c:437
| processing connection conn_IPSec
| R_U_THERE_ACK, seqno received: 29742 expected: 29742 (state=#1)dpd_inR()


-----------------------------------------------------------------------------------



 # | processing connection conn_IPSec
| received R_U_THERE seq:2589 time:1291696322 (state=#1 name="conn_IPSec")
| processing connection conn_IPSec
| processing dpd for state #3 ("conn_IPSec")
| not yet time for dpd event: 1291696322 < 1291696352


/ # | processing connection conn_IPSec
| processing dpd for state #3 ("conn_IPSec")
| scheduling timeout to 120
| state: 1 requesting event none to be deleted by /home/samba/ubicom-distro/uClinux/openswan-2.6.24rc4/programs/pluto/dpd.c:192
| sending R_U_THERE 29764 to 192.168.3.100:500 (state #1)
| processing connection conn_IPSec
| R_U_THERE_ACK, seqno received: 29764 expected: 29764 (state=#1)
| state: 1 requesting event EVENT_DPD_TIMEOUT to be deleted by /home/samba/ubicom-distro/uClinux/openswan-2.6.24rc4/programs/pluto/dpd.c:514





---------------------------------------------------
---------------------------------------------------
但xauth的方向是相反的,xauthserver送R_U_THERE, xauthclient回R_U_THERE_ACK


"conn_conn_ipsec" #1: Dead Peer Detection (RFC 3706): enabled
| state: 1 requesting event none to be deleted by /home/samba/ubicom-distro/uClinux/openswan-2.6.24rc4/programs/pluto/dpd.c:159
| processing connection conn_conn_ipsec
"conn_conn_ipsec" #1: =========xclient_account(conn_conn_ipsec)
"conn_conn_ipsec" #1: GET DB:[peter,1234 ] FORM [conn_conn_ipsec]
"conn_conn_ipsec" #1: user:peter, pass:1234
"conn_conn_ipsec" #1: XAUTH: Answering XAUTH challenge with user='peter'
"conn_conn_ipsec" #1: transition from state STATE_XAUTH_I0 to state STATE_XAUTH_I1
"conn_conn_ipsec" #1: STATE_XAUTH_I1: XAUTH client - awaiting CFG_set
"conn_conn_ipsec" #1: Dead Peer Detection (RFC 3706): enabled
| processing connection conn_conn_ipsec
"conn_conn_ipsec" #1: XAUTH: Successfully Authenticated
"conn_conn_ipsec" #1: transition from state STATE_XAUTH_I0 to state STATE_XAUTH_I1
"conn_conn_ipsec" #1: STATE_XAUTH_I1: XAUTH client - awaiting CFG_set
"conn_conn_ipsec" #1: Dead Peer Detection (RFC 3706): enabled
| unpending state #1
| processing connection conn_conn_ipsec
"conn_conn_ipsec" #2: initiating Quick Mode PSK+ENCRYPT+TUNNEL+PFS {using isakmp#1 msgid:9227c94a proposal=3DES(3)_192-MD5(1)_128, AES(12)_128-MD5(1)_128, AES(12)_128-MD5(1)_128, AES(12)_192-MD5(1)_128 pfsgroup=OAKLEY_GROUP_MODP1024}
| processing connection conn_conn_ipsec
| removing pending policy for "none" {0x4105d324}
| processing connection conn_conn_ipsec
| processing connection conn_conn_ipsec

/ # "conn_conn_ipsec" #2: Dead Peer Detection (RFC 3706): enabled
| state: 2 requesting event none to be deleted by /home/samba/ubicom-distro/uClinux/openswan-2.6.24rc4/programs/pluto/dpd.c:159
| state: 1 requesting event EVENT_DPD to be deleted by /home/samba/ubicom-distro/uClinux/openswan-2.6.24rc4/programs/pluto/dpd.c:171
"conn_conn_ipsec" #2: transition from state STATE_QUICK_I1 to state STATE_QUICK_I2
"conn_conn_ipsec" #2: STATE_QUICK_I2: sent QI2, IPsec SA established tunnel mode {ESP=>0xe7bb4694 <0xc780201b xfrm=3DES_0-HMAC_MD5 NATOA=none NATD=none DPD=enabled}

/ #
/ # | processing connection conn_conn_ipsec
| received R_U_THERE seq:16359 time:1292989489 (state=#1 name="conn_conn_ipsec")
| processing connection conn_conn_ipsec
"conn_conn_ipsec" #2: DPD Error: could not find newest phase 1 state

trace一下「find_phase1_state」
struct state *
find_phase1_state(const struct connection *c, lset_t ok_states)

"conn_conn_ipsec" #2: find_phase1_state:1193,i: 13
"conn_conn_ipsec" #2: find_phase1_state:1195, LHAS: Fail, host_pair: Ture, same_peer_ids: True
"conn_conn_ipsec" #2: find_phase1_state:1195, LHAS: Fail, host_pair: Ture, same_peer_ids: True
"conn_conn_ipsec" #2: find_phase1_state:1193,i: 14

struct state *
find_phase1_state(const struct connection *c, lset_t ok_states)
{
    struct state
        *st,
        *best = NULL;
    int i;

    loglog(RC_LOG_SERIOUS, "%s:%d, STATE_TABLE_SIZE=:%d\n",
                 __FUNCTION__, __LINE__, STATE_TABLE_SIZE);

    for (i = 0; i < STATE_TABLE_SIZE; i++) {
        loglog(RC_LOG_SERIOUS, "%s:%d,i: %d\n", __FUNCTION__, __LINE__, i);
        for (st = statetable[i]; st != NULL; st = st->st_hashchain_next) {
            loglog(RC_LOG_SERIOUS, "%s:%d, [%X:%X]LHAS: %s, host_pair: %s, same_peer_ids: %s\n", __FUNCTION__, __LINE__,
                        ok_states, st->st_state,
                    LHAS(ok_states, st->st_state)?"Ture":"Fail",
                    c->host_pair == st->st_connection->host_pair?"Ture":"Fail",
                    same_peer_ids(c, st->st_connection, NULL)?"True":"Fail");

            if (LHAS(ok_states, st->st_state)
                && c->host_pair == st->st_connection->host_pair
                && same_peer_ids(c, st->st_connection, NULL)
                && (best == NULL
                    || best->st_serialno < st->st_serialno))
                {
                    loglog(RC_LOG_SERIOUS, "%s:%d\n", __FUNCTION__, __LINE__);
                    best = st;
                }
        }
    }

    return best;
}



-------------------------------------
"conn_conn_ipsec" #2: find_phase1_state:1195, [C600:1341485BC0]LHAS: Ture, host_pair: True, same_peer_ids: \001\003d\343\220
"conn_conn_ipsec" #2: find_phase1_state:1195, [C600:1E41485BC0]LHAS: Ture, host_pair: True, same_peer_ids: \001\003d\343\220

沒有留言: