diff -r 9e9120e15a20 .hgtags
--- a/.hgtags	Mon Mar 03 13:05:56 2008 -0600
+++ b/.hgtags	Thu Mar 13 13:02:11 2008 -0500
@@ -8,6 +8,7 @@ 12e748600d0d4a6198c62612f639a1ff8f593c07
 12e748600d0d4a6198c62612f639a1ff8f593c07 pvfs2-1-5-point
 185b783bda1609681ddf9b7ce7e89443d3ba51a0 pvfs2-0-5-0
 208523d3b47bd8150ea662d50fadf2b4b67bf5aa pvfs2-1-0-1
+252809de84f5a4a06e932fa494b0a6739eccc09b small-file-branch-point
 2660420287b4e22f0f5c260664788ce6c47c3680 pvfs2-1-5-1-rc2
 26d5a275df88b4b6a60d37c1a4b4a2d92083bd63 pvfs-2-7-branch-point
 2e5a1624e6432abfd7bdb0d8d4d2b838524ed587 pvfs-2-6-point3
diff -r 9e9120e15a20 src/apps/kernel/linux/pvfs2-client-core.c
--- a/src/apps/kernel/linux/pvfs2-client-core.c	Mon Mar 03 13:05:56 2008 -0600
+++ b/src/apps/kernel/linux/pvfs2-client-core.c	Thu Mar 13 13:02:11 2008 -0500
@@ -1075,6 +1075,8 @@ failed:
     if(ret < 0)
     {
         PVFS_perror_gossip("Posting fs_add failed", ret);
+        gossip_err("  Tried to mount host %s\n",
+                vfs_request->in_upcall.req.fs_mount.pvfs2_config_server);
     }
 
     return ret;
diff -r 9e9120e15a20 src/io/bmi/bmi_ib/ib.c
--- a/src/io/bmi/bmi_ib/ib.c	Mon Mar 03 13:05:56 2008 -0600
+++ b/src/io/bmi/bmi_ib/ib.c	Thu Mar 13 13:02:11 2008 -0500
@@ -85,8 +85,6 @@ static int ib_tcp_client_connect(ib_meth
 static int ib_tcp_client_connect(ib_method_addr_t *ibmap,
                                  struct bmi_method_addr *remote_map);
 #endif
-static int ib_tcp_server_check_new_connections(void);
-static int ib_block_for_activity(int timeout_ms);
 
 /*
  * Return string form of work completion opcode field.
@@ -109,7 +107,7 @@ static const char *wc_opcode_string(int 
  * walk the incomingq looking for things to do to them.  Returns
  * number of new things that arrived.
  */
-static int ib_check_cq(void)
+int ib_check_cq(void)
 {
     int ret = 0;
 
@@ -1444,7 +1442,7 @@ BMI_ib_close_context(bmi_context_id cont
  * Can't just call test since we don't want to reap the operation,
  * just make sure it's done or not.
  */
-static int
+int
 BMI_ib_cancel(bmi_op_id_t id, bmi_context_id context_id __unused)
 {
     struct method_op *mop;
@@ -1535,6 +1533,7 @@ BMI_ib_rev_lookup(struct bmi_method_addr
     else
 	return ibmap->c->peername;
 }
+
 
 /*
  * Build and fill an IB-specific method_addr structure.
@@ -1851,7 +1850,7 @@ out_unlock:
  * testunexpected will pick up new connections.  Returns ==1 if IB device is
  * ready, other >0 for some activity, else 0.
  */
-static int ib_block_for_activity(int timeout_ms)
+int ib_block_for_activity(int timeout_ms)
 {
     struct pollfd pfd[3];  /* cq fd, async fd, accept socket */
     int numfd;
@@ -1939,9 +1938,17 @@ static int BMI_ib_set_info(int option, v
     switch (option) {
     case BMI_DROP_ADDR: {
 	struct bmi_method_addr *map = param;
-	ib_method_addr_t *ibmap = map->method_data;
-	free(ibmap->hostname);
-	free(map);
+	if (map){
+	    ib_method_addr_t *ibmap = map->method_data;
+	    if (ibmap->hostname){
+	    	debug(0, "%s freeing ibmap->hostname %s", __func__, ibmap->hostname);
+	    	free(ibmap->hostname);
+		ibmap->hostname = NULL;
+	    }
+	    debug(0, "%s freeing map %p type %d data %p", 
+	    		__func__, map, map->method_type, map->method_data);
+	    free(map);
+	}
 	break;
     }
     case BMI_OPTIMISTIC_BUFFER_REG: {
diff -r 9e9120e15a20 src/io/bmi/bmi_ib/ib.h
--- a/src/io/bmi/bmi_ib/ib.h	Mon Mar 03 13:05:56 2008 -0600
+++ b/src/io/bmi/bmi_ib/ib.h	Thu Mar 13 13:02:11 2008 -0500
@@ -25,6 +25,9 @@
 /* 20 8kB buffers allocated to each connection for unexpected messages */
 #define DEFAULT_EAGER_BUF_NUM  (20)
 #define DEFAULT_EAGER_BUF_SIZE (8 << 10)
+
+int ib_check_cq(void);
+int ib_block_for_activity(int timeout_ms);
 
 struct buf_head;
 
@@ -55,6 +58,7 @@ typedef struct {
 
     int send_credit;    /* free slots on receiver */
     int return_credit;  /* receive buffers he filled but that we've emptied */
+    int wr_credit;	/* make sure we don't overflow available wr in hca */
 
     void *priv;
 
@@ -454,7 +458,7 @@ void memcache_cache_flush(void *md);
  * Debugging macros.
  */
 #if 1
-#define DEBUG_LEVEL 2
+#define DEBUG_LEVEL 1
 #define debug(lvl,fmt,args...) \
     do { \
 	if (lvl <= DEBUG_LEVEL) \
diff -r 9e9120e15a20 src/io/bmi/bmi_ib/openib.c
--- a/src/io/bmi/bmi_ib/openib.c	Mon Mar 03 13:05:56 2008 -0600
+++ b/src/io/bmi/bmi_ib/openib.c	Thu Mar 13 13:02:11 2008 -0500
@@ -23,6 +23,9 @@
 
 #include "ib.h"
 
+#define POST_MAX_RETRIES 100
+#define POST_RETRY_BLOCKTIME 10
+
 /*
  * OpenIB-private device-wide state.
  */
@@ -38,6 +41,7 @@ struct openib_device_priv {
     int nic_max_sge;
     int nic_max_wr;
 
+    int nic_wr_credit;	/* credit used to prevent wq overflows */
     /*
      * Temp array for filling scatter/gather lists to pass to IB functions,
      * allocated once at start to max size defined as reported by the qp.
@@ -154,9 +158,15 @@ static int openib_new_connection(ib_conn
     att.qp_type = IBV_QPT_RC;
     oc->qp = ibv_create_qp(od->nic_pd, &att);
     if (!oc->qp)
-	error("%s: create QP", __func__);
+	error_errno("%s: create QP", __func__);
     VALGRIND_MAKE_MEM_DEFINED(&att, sizeof(att));
     VALGRIND_MAKE_MEM_DEFINED(&oc->qp->qp_num, sizeof(oc->qp->qp_num));
+
+    /* set the default max num of wr_credit to the num_wr to prevent
+     * overflow of the wr's under heavy load on slower hardware */
+//    od->nic_wr_credit = num_wr;
+#warning fixme
+    od->nic_wr_credit = 0;
 
     /* compare the caps that came back against what we already have */
     if (od->sg_max_len == 0) {
@@ -183,10 +193,10 @@ static int openib_new_connection(ib_conn
 
     /* verify we got what we asked for */
     if ((int) att.cap.max_recv_wr < num_wr)
-	error("%s: asked for %d recv WRs on QP, got %d", __func__, num_wr,
+	warning("%s: asked for %d recv WRs on QP, got %d", __func__, num_wr,
 	      att.cap.max_recv_wr);
     if ((int) att.cap.max_send_wr < num_wr)
-	error("%s: asked for %d send WRs on QP, got %d", __func__, num_wr,
+	warning("%s: asked for %d send WRs on QP, got %d", __func__, num_wr,
 	      att.cap.max_send_wr);
 
     /* exchange data, converting info to network order and back */
@@ -285,7 +295,7 @@ static void init_connection_modify_qp(st
     attr.port_num = od->nic_port;
     ret = ibv_modify_qp(qp, &attr, mask);
     if (ret)
-	error_xerrno(ret, "%s: ibv_modify_qp -> INIT", __func__);
+	error("%s: ibv_modify_qp -> INIT returned %d", __func__, ret);
 
     /* Transition QP to Ready-to-Receive (RTR) */
     mask =
@@ -307,7 +317,7 @@ static void init_connection_modify_qp(st
     attr.min_rnr_timer = 31;
     ret = ibv_modify_qp(qp, &attr, mask);
     if (ret)
-	error_xerrno(ret, "%s: ibv_modify_qp INIT -> RTR", __func__);
+	error("%s: ibv_modify_qp INIT -> RTR returned %d", __func__, ret);
 
     /* transition qp to ready-to-send */
     mask =
@@ -326,7 +336,7 @@ static void init_connection_modify_qp(st
     attr.rnr_retry = 20;
     ret = ibv_modify_qp(qp, &attr, mask);
     if (ret)
-	error_xerrno(ret, "%s: ibv_modify_qp RTR -> RTS", __func__);
+	error("%s: ibv_modify_qp RTR -> RTS returned %d", __func__, ret);
 
 }
 
@@ -347,7 +357,7 @@ static void openib_drain_qp(ib_connectio
     attr.qp_state = IBV_QPS_SQD;
     ret = ibv_modify_qp(qp, &attr, mask);
     if (ret < 0)
-	error_xerrno(ret, "%s: ibv_modify_qp RTS -> SQD", __func__);
+	error("%s: ibv_modify_qp RTS -> SQD returned %d", __func__, ret);
 }
 
 /*
@@ -364,19 +374,19 @@ static void openib_close_connection(ib_c
     if (oc->qp) {
 	ret = ibv_destroy_qp(oc->qp);
 	if (ret < 0)
-	    error_xerrno(ret, "%s: ibv_destroy_qp", __func__);
+	    error("%s: ibv_destroy_qp returned %d", __func__, ret);
     }
 
     /* destroy the memory regions */
     if (oc->eager_send_mr) {
 	ret = ibv_dereg_mr(oc->eager_send_mr);
 	if (ret < 0)
-	    error_xerrno(ret, "%s: ibv_deregister_mr eager send", __func__);
+	    error("%s: ibv_deregister_mr eager send returned %d", __func__, ret);
     }
     if (oc->eager_recv_mr) {
 	ret = ibv_dereg_mr(oc->eager_recv_mr);
 	if (ret < 0)
-	    error_xerrno(ret, "%s: ibv_deregister_mr eager recv", __func__);
+	    error("%s: ibv_deregister_mr eager recv returned %d", __func__, ret);
     }
 
     free(oc);
@@ -392,6 +402,7 @@ static void openib_post_sr(const struct 
     struct openib_connection_priv *oc = c->priv;
     struct openib_device_priv *od = ib_device->priv;
     int ret;
+    int retry_count = 0;
     struct ibv_sge sg = {
         .addr = int64_from_ptr(bh->buf),
         .length = len,
@@ -407,7 +418,7 @@ static void openib_post_sr(const struct 
     };
     struct ibv_send_wr *bad_wr;
 
-    debug(4, "%s: %s bh %d len %u wr %d/%d", __func__, c->peername, bh->num,
+    debug(1, "%s: %s bh %d len %u wr %d/%d", __func__, c->peername, bh->num,
           len, od->num_unsignaled_sends, od->max_unsignaled_sends);
 
     if (od->num_unsignaled_sends + 10 == od->max_unsignaled_sends)
@@ -415,9 +426,20 @@ static void openib_post_sr(const struct 
     else
         ++od->num_unsignaled_sends;
 
+retry_post:
     ret = ibv_post_send(oc->qp, &sr, &bad_wr);
-    if (ret < 0)
-        error("%s: ibv_post_send (%d)", __func__, ret);
+    if (ret < 0){
+        if (ib_block_for_activity(POST_RETRY_BLOCKTIME)){
+            ib_check_cq(); /* hopefully pop some work */
+        }
+	if (retry_count < POST_MAX_RETRIES){
+            warning("%s: retry_post block wr_credit %d count/max: %d/%d",
+            	    __func__, od->nic_wr_credit, retry_count, POST_MAX_RETRIES);
+	    ++retry_count;
+	    goto retry_post;
+	}
+        error("%s: ibv_post_send failed ret: %d errno %d", __func__, ret, errno);
+    }
 }
 
 /*
@@ -443,7 +465,7 @@ static void openib_post_rr(const ib_conn
     debug(4, "%s: %s bh %d", __func__, c->peername, bh->num);
     ret = ibv_post_recv(oc->qp, &rr, &bad_wr);
     if (ret)
-        error("%s: ibv_post_recv", __func__);
+        error_errno("%s: ibv_post_recv", __func__);
 }
 
 /*
@@ -505,6 +527,7 @@ static void openib_post_sr_rdmaw(struct 
     while (!done) {
         int ret;
         struct ibv_send_wr *bad_wr;
+	int retry_count = 0;
 
         if (recv_bytes_needed == 0) {
             /* new one, fresh numbers */
@@ -570,14 +593,50 @@ static void openib_post_sr_rdmaw(struct 
         if (done) {
             sr.wr_id = int64_from_ptr(sq);     /* used to match in completion */
             sr.send_flags = IBV_SEND_SIGNALED; /* completion drives the unpin */
+	    ++od->nic_wr_credit;
         } else {
             sr.wr_id = 0;
             sr.send_flags = 0;
         }
+        
+	debug(1, "%s: ibv_post_send wr_id %llx to %s remote addr %llx rkey %x nic_wr_credit %d",
+              __func__, llu(sr.wr_id), c->peername, llu(sr.wr.rdma.remote_addr),
+              sr.wr.rdma.rkey, od->nic_wr_credit);
 
+retry_post:
         ret = ibv_post_send(oc->qp, &sr, &bad_wr);
-        if (ret < 0)
-            error("%s: ibv_post_send (%d)", __func__, ret);
+        if (ret < 0){
+		if (ib_block_for_activity(POST_RETRY_BLOCKTIME)){
+			ib_check_cq(); /* hopefully pop some work */
+		}
+		if (retry_count < POST_MAX_RETRIES){
+		    warning("%s: retry_post wr_credit %d count/max: %d/%d", 
+			__func__, od->nic_wr_credit, retry_count, POST_MAX_RETRIES);
+		    ++retry_count;
+		    goto retry_post;
+		}	
+
+		gossip_err("%s: ibv_post_send failed ret: %d errno: %d\n",
+			__func__, ret, errno);
+		gossip_err(" wr_id: 0x%lx next: %p sg_list %p num_sge %d\n",
+			sr.wr_id, sr.next, sr.sg_list, sr.num_sge);
+		gossip_err(" opcode: 0x%x send_flags: 0x%x imm_data: 0x%x\n",
+			sr.opcode, sr.send_flags, sr.imm_data);
+		gossip_err(" sr.wr.rdma.remote_addr: 0x%llx rkey 0x%x\n",
+			llu(sr.wr.rdma.remote_addr), sr.wr.rdma.rkey);
+		gossip_err(" od->nic_wr_credit %d od->nic_max_wr %d\n",
+			od->nic_wr_credit, od->nic_max_wr);
+
+		struct ibv_qp_attr attr;
+		struct ibv_qp_init_attr init_attr;
+		ibv_query_qp(oc->qp,&attr,
+	IBV_QP_STATE | IBV_QP_CUR_STATE | IBV_QP_TIMEOUT |IBV_QP_CAP, &init_attr);
+		gossip_err("%s: QP_request sge: %d\n",
+			__func__,sr.num_sge);
+		error_xerrno(ret, "%s: QP_sge: %d\n",__func__,attr.cap.max_send_sge);	
+	/* error("%s: ibv_post_send (%d)", __func__, ret); */
+	}
+
     }
 
 #if MEMCACHE_BOUNCEBUF
@@ -606,9 +665,12 @@ static int openib_check_cq(struct bmi_ib
 	wc->opcode = BMI_IB_OP_SEND;
     else if (desc.opcode == (IBV_WC_SEND | IBV_WC_RECV))
 	wc->opcode = BMI_IB_OP_RECV;
-    else if (desc.opcode == IBV_WC_RDMA_WRITE)
+    else if (desc.opcode == IBV_WC_RDMA_WRITE){
 	wc->opcode = BMI_IB_OP_RDMA_WRITE;
-    else {
+	--od->nic_wr_credit;
+	debug(1, "%s: completed rdma wr_id %llx, nic_wr_credit %d",
+		__func__, llu(desc.wr_id), od->nic_wr_credit);
+    } else {
 	debug(0, "%s: unknown opcode, id %llx status %d opcode %d",
 	      __func__, llu(desc.wr_id), desc.status, desc.opcode);
 	debug(0, "%s: vendor_err %d byte_len %d imm_data %d qp_num %d",
@@ -633,7 +695,7 @@ static void openib_prepare_cq_block(int 
     /* ask for the next notfication */
     ret = ibv_req_notify_cq(od->nic_cq, 0);
     if (ret < 0)
-	error_xerrno(ret, "%s: ibv_req_notify_cq", __func__);
+	error("%s: ibv_req_notify_cq returned %d", __func__, ret);
 
     /* return the fd that can be fed to poll() */
     *cq_fd = od->channel->fd;
@@ -791,7 +853,7 @@ static void openib_mem_deregister(memcac
     mrh = ptr_from_int64(c->memkeys.mrh);  /* convert 64-bit int to pointer */
     ret = ibv_dereg_mr(mrh);
     if (ret)
-	error_xerrno(ret, "%s: ibv_dereg_mr", __func__);
+	error("%s: ibv_dereg_mr returned %d", __func__, ret);
     debug(4, "%s: buf %p len %lld lkey %x rkey %x", __func__,
       c->buf, lld(c->len), c->memkeys.lkey, c->memkeys.rkey);
 }
@@ -904,7 +966,7 @@ int openib_ib_initialize(void)
     /* get the lid and verify port state */
     ret = ibv_query_port(od->ctx, od->nic_port, &hca_port);
     if (ret)
-	error_xerrno(ret, "%s: ibv_query_port", __func__);
+	error("%s: ibv_query_port returned %d", __func__, ret);
     VALGRIND_MAKE_MEM_DEFINED(&hca_port, sizeof(hca_port));
 
     od->nic_lid = hca_port.lid;
@@ -916,7 +978,7 @@ int openib_ib_initialize(void)
     /* Query the device for the max_ requests and such */
     ret = ibv_query_device(od->ctx, &hca_cap);
     if (ret)
-	error_xerrno(ret, "%s: ibv_query_device", __func__);
+       error_xerrno(ret, "%s: ibv_query_device", __func__);
     VALGRIND_MAKE_MEM_DEFINED(&hca_cap, sizeof(hca_cap));
 
     debug(1, "%s: max %d completion queue entries", __func__, hca_cap.max_cq);
@@ -979,16 +1041,16 @@ static void openib_ib_finalize(void)
 	free(od->sg_tmp_array);
     ret = ibv_destroy_cq(od->nic_cq);
     if (ret)
-	error_xerrno(ret, "%s: ibv_destroy_cq", __func__);
+	error("%s: ibv_destroy_cq returned %d", __func__, ret);
     ret = ibv_destroy_comp_channel(od->channel);
     if (ret)
-	error_xerrno(ret, "%s: ibv_destroy_comp_channel", __func__);
+	error("%s: ibv_destroy_comp_channel returned %d", __func__, ret);
     ret = ibv_dealloc_pd(od->nic_pd);
     if (ret)
-	error_xerrno(ret, "%s: ibv_dealloc_pd", __func__);
+	error("%s: ibv_dealloc_pd returned %d", __func__, ret);
     ret = ibv_close_device(od->ctx);
     if (ret)
-	error_xerrno(ret, "%s: ibv_close_device", __func__);
+	error("%s: ibv_close_device returned %d", __func__, ret);
 
     free(od);
     ib_device->priv = NULL;

