[dpdk-dev,4/7] net/cxgbe: implement flow create operation

Message ID e5559ea7338e5051c1ca2b3d57b68aed58eab1d5.1528469677.git.rahul.lakkireddy@chelsio.com (mailing list archive)
State Accepted, archived
Delegated to: Ferruh Yigit
Headers
Series cxgbe: add support to offload flows via rte_flow |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Rahul Lakkireddy June 8, 2018, 5:58 p.m. UTC
  From: Shagun Agrawal <shaguna@chelsio.com>

Define filter work request API used to construct filter operations
to be communicated with firmware. These requests are sent via
control queue and completions come asynchronously in firmware event
queue.

Implement flow create operation to create filters in LE-TCAM
(maskfull) region at specified index.

Signed-off-by: Shagun Agrawal <shaguna@chelsio.com>
Signed-off-by: Kumar Sanghvi <kumaras@chelsio.com>
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
---
 drivers/net/cxgbe/base/adapter.h        |  21 ++
 drivers/net/cxgbe/base/t4_msg.h         |  22 ++
 drivers/net/cxgbe/base/t4fw_interface.h | 145 +++++++++++++
 drivers/net/cxgbe/cxgbe.h               |   2 +
 drivers/net/cxgbe/cxgbe_filter.c        | 356 ++++++++++++++++++++++++++++++++
 drivers/net/cxgbe/cxgbe_filter.h        |  32 +++
 drivers/net/cxgbe/cxgbe_flow.c          |  82 +++++++-
 drivers/net/cxgbe/cxgbe_flow.h          |   4 +
 drivers/net/cxgbe/cxgbe_main.c          |  36 ++++
 9 files changed, 699 insertions(+), 1 deletion(-)
  

Patch

diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h
index 9a66a4a99..7f9ddae01 100644
--- a/drivers/net/cxgbe/base/adapter.h
+++ b/drivers/net/cxgbe/base/adapter.h
@@ -717,6 +717,27 @@  static inline void t4_os_atomic_list_del(struct mbox_entry *entry,
 	t4_os_unlock(lock);
 }
 
+/**
+ * t4_init_completion - initialize completion
+ * @c: the completion context
+ */
+static inline void t4_init_completion(struct t4_completion *c)
+{
+	c->done = 0;
+	t4_os_lock_init(&c->lock);
+}
+
+/**
+ * t4_complete - set completion as done
+ * @c: the completion context
+ */
+static inline void t4_complete(struct t4_completion *c)
+{
+	t4_os_lock(&c->lock);
+	c->done = 1;
+	t4_os_unlock(&c->lock);
+}
+
 void *t4_alloc_mem(size_t size);
 void t4_free_mem(void *addr);
 #define t4_os_alloc(_size)     t4_alloc_mem((_size))
diff --git a/drivers/net/cxgbe/base/t4_msg.h b/drivers/net/cxgbe/base/t4_msg.h
index 74b4fc193..43d1cb66f 100644
--- a/drivers/net/cxgbe/base/t4_msg.h
+++ b/drivers/net/cxgbe/base/t4_msg.h
@@ -7,6 +7,7 @@ 
 #define T4_MSG_H
 
 enum {
+	CPL_SET_TCB_RPL       = 0x3A,
 	CPL_SGE_EGR_UPDATE    = 0xA5,
 	CPL_FW4_MSG           = 0xC0,
 	CPL_FW6_MSG           = 0xE0,
@@ -25,6 +26,13 @@  union opcode_tid {
 	__u8 opcode;
 };
 
+#define G_TID(x)    ((x) & 0xFFFFFF)
+
+#define OPCODE_TID(cmd) ((cmd)->ot.opcode_tid)
+
+/* extract the TID from a CPL command */
+#define GET_TID(cmd) (G_TID(be32_to_cpu(OPCODE_TID(cmd))))
+
 struct rss_header {
 	__u8 opcode;
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
@@ -66,6 +74,20 @@  struct work_request_hdr {
 #define WR_HDR_SIZE 0
 #endif
 
+#define S_COOKIE    5
+#define M_COOKIE    0x7
+#define V_COOKIE(x) ((x) << S_COOKIE)
+#define G_COOKIE(x) (((x) >> S_COOKIE) & M_COOKIE)
+
+struct cpl_set_tcb_rpl {
+	RSS_HDR
+	union opcode_tid ot;
+	__be16 rsvd;
+	__u8   cookie;
+	__u8   status;
+	__be64 oldval;
+};
+
 struct cpl_tx_data {
 	union opcode_tid ot;
 	__be32 len;
diff --git a/drivers/net/cxgbe/base/t4fw_interface.h b/drivers/net/cxgbe/base/t4fw_interface.h
index 44b6f6dac..842aa1263 100644
--- a/drivers/net/cxgbe/base/t4fw_interface.h
+++ b/drivers/net/cxgbe/base/t4fw_interface.h
@@ -54,6 +54,7 @@  enum fw_memtype {
  ********************************/
 
 enum fw_wr_opcodes {
+	FW_FILTER_WR		= 0x02,
 	FW_ETH_TX_PKT_WR	= 0x08,
 	FW_ETH_TX_PKTS_WR	= 0x09,
 	FW_ETH_TX_PKT_VM_WR	= 0x11,
@@ -143,6 +144,150 @@  struct fw_eth_tx_pkts_vm_wr {
 	__be16 vlantci;
 };
 
+/* filter wr reply code in cookie in CPL_SET_TCB_RPL */
+enum fw_filter_wr_cookie {
+	FW_FILTER_WR_SUCCESS,
+	FW_FILTER_WR_FLT_ADDED,
+	FW_FILTER_WR_FLT_DELETED,
+	FW_FILTER_WR_SMT_TBL_FULL,
+	FW_FILTER_WR_EINVAL,
+};
+
+struct fw_filter_wr {
+	__be32 op_pkd;
+	__be32 len16_pkd;
+	__be64 r3;
+	__be32 tid_to_iq;
+	__be32 del_filter_to_l2tix;
+	__be16 ethtype;
+	__be16 ethtypem;
+	__u8   frag_to_ovlan_vldm;
+	__u8   smac_sel;
+	__be16 rx_chan_rx_rpl_iq;
+	__be32 maci_to_matchtypem;
+	__u8   ptcl;
+	__u8   ptclm;
+	__u8   ttyp;
+	__u8   ttypm;
+	__be16 ivlan;
+	__be16 ivlanm;
+	__be16 ovlan;
+	__be16 ovlanm;
+	__u8   lip[16];
+	__u8   lipm[16];
+	__u8   fip[16];
+	__u8   fipm[16];
+	__be16 lp;
+	__be16 lpm;
+	__be16 fp;
+	__be16 fpm;
+	__be16 r7;
+	__u8   sma[6];
+};
+
+#define S_FW_FILTER_WR_TID	12
+#define V_FW_FILTER_WR_TID(x)	((x) << S_FW_FILTER_WR_TID)
+
+#define S_FW_FILTER_WR_RQTYPE		11
+#define V_FW_FILTER_WR_RQTYPE(x)	((x) << S_FW_FILTER_WR_RQTYPE)
+
+#define S_FW_FILTER_WR_NOREPLY		10
+#define V_FW_FILTER_WR_NOREPLY(x)	((x) << S_FW_FILTER_WR_NOREPLY)
+
+#define S_FW_FILTER_WR_IQ	0
+#define V_FW_FILTER_WR_IQ(x)	((x) << S_FW_FILTER_WR_IQ)
+
+#define S_FW_FILTER_WR_DEL_FILTER	31
+#define V_FW_FILTER_WR_DEL_FILTER(x)	((x) << S_FW_FILTER_WR_DEL_FILTER)
+#define F_FW_FILTER_WR_DEL_FILTER	V_FW_FILTER_WR_DEL_FILTER(1U)
+
+#define S_FW_FILTER_WR_RPTTID		25
+#define V_FW_FILTER_WR_RPTTID(x)	((x) << S_FW_FILTER_WR_RPTTID)
+
+#define S_FW_FILTER_WR_DROP	24
+#define V_FW_FILTER_WR_DROP(x)	((x) << S_FW_FILTER_WR_DROP)
+
+#define S_FW_FILTER_WR_DIRSTEER		23
+#define V_FW_FILTER_WR_DIRSTEER(x)	((x) << S_FW_FILTER_WR_DIRSTEER)
+
+#define S_FW_FILTER_WR_MASKHASH		22
+#define V_FW_FILTER_WR_MASKHASH(x)	((x) << S_FW_FILTER_WR_MASKHASH)
+
+#define S_FW_FILTER_WR_DIRSTEERHASH	21
+#define V_FW_FILTER_WR_DIRSTEERHASH(x)	((x) << S_FW_FILTER_WR_DIRSTEERHASH)
+
+#define S_FW_FILTER_WR_LPBK	20
+#define V_FW_FILTER_WR_LPBK(x)	((x) << S_FW_FILTER_WR_LPBK)
+
+#define S_FW_FILTER_WR_DMAC	19
+#define V_FW_FILTER_WR_DMAC(x)	((x) << S_FW_FILTER_WR_DMAC)
+
+#define S_FW_FILTER_WR_INSVLAN		17
+#define V_FW_FILTER_WR_INSVLAN(x)	((x) << S_FW_FILTER_WR_INSVLAN)
+
+#define S_FW_FILTER_WR_RMVLAN		16
+#define V_FW_FILTER_WR_RMVLAN(x)	((x) << S_FW_FILTER_WR_RMVLAN)
+
+#define S_FW_FILTER_WR_HITCNTS		15
+#define V_FW_FILTER_WR_HITCNTS(x)	((x) << S_FW_FILTER_WR_HITCNTS)
+
+#define S_FW_FILTER_WR_TXCHAN		13
+#define V_FW_FILTER_WR_TXCHAN(x)	((x) << S_FW_FILTER_WR_TXCHAN)
+
+#define S_FW_FILTER_WR_PRIO	12
+#define V_FW_FILTER_WR_PRIO(x)	((x) << S_FW_FILTER_WR_PRIO)
+
+#define S_FW_FILTER_WR_L2TIX	0
+#define V_FW_FILTER_WR_L2TIX(x)	((x) << S_FW_FILTER_WR_L2TIX)
+
+#define S_FW_FILTER_WR_FRAG	7
+#define V_FW_FILTER_WR_FRAG(x)	((x) << S_FW_FILTER_WR_FRAG)
+
+#define S_FW_FILTER_WR_FRAGM	6
+#define V_FW_FILTER_WR_FRAGM(x)	((x) << S_FW_FILTER_WR_FRAGM)
+
+#define S_FW_FILTER_WR_IVLAN_VLD	5
+#define V_FW_FILTER_WR_IVLAN_VLD(x)	((x) << S_FW_FILTER_WR_IVLAN_VLD)
+
+#define S_FW_FILTER_WR_OVLAN_VLD	4
+#define V_FW_FILTER_WR_OVLAN_VLD(x)	((x) << S_FW_FILTER_WR_OVLAN_VLD)
+
+#define S_FW_FILTER_WR_IVLAN_VLDM	3
+#define V_FW_FILTER_WR_IVLAN_VLDM(x)	((x) << S_FW_FILTER_WR_IVLAN_VLDM)
+
+#define S_FW_FILTER_WR_OVLAN_VLDM	2
+#define V_FW_FILTER_WR_OVLAN_VLDM(x)	((x) << S_FW_FILTER_WR_OVLAN_VLDM)
+
+#define S_FW_FILTER_WR_RX_CHAN		15
+#define V_FW_FILTER_WR_RX_CHAN(x)	((x) << S_FW_FILTER_WR_RX_CHAN)
+
+#define S_FW_FILTER_WR_RX_RPL_IQ	0
+#define V_FW_FILTER_WR_RX_RPL_IQ(x)	((x) << S_FW_FILTER_WR_RX_RPL_IQ)
+
+#define S_FW_FILTER_WR_MACI	23
+#define V_FW_FILTER_WR_MACI(x)	((x) << S_FW_FILTER_WR_MACI)
+
+#define S_FW_FILTER_WR_MACIM	14
+#define V_FW_FILTER_WR_MACIM(x)	((x) << S_FW_FILTER_WR_MACIM)
+
+#define S_FW_FILTER_WR_FCOE	13
+#define V_FW_FILTER_WR_FCOE(x)	((x) << S_FW_FILTER_WR_FCOE)
+
+#define S_FW_FILTER_WR_FCOEM	12
+#define V_FW_FILTER_WR_FCOEM(x)	((x) << S_FW_FILTER_WR_FCOEM)
+
+#define S_FW_FILTER_WR_PORT	9
+#define V_FW_FILTER_WR_PORT(x)	((x) << S_FW_FILTER_WR_PORT)
+
+#define S_FW_FILTER_WR_PORTM	6
+#define V_FW_FILTER_WR_PORTM(x)	((x) << S_FW_FILTER_WR_PORTM)
+
+#define S_FW_FILTER_WR_MATCHTYPE	3
+#define V_FW_FILTER_WR_MATCHTYPE(x)	((x) << S_FW_FILTER_WR_MATCHTYPE)
+
+#define S_FW_FILTER_WR_MATCHTYPEM	0
+#define V_FW_FILTER_WR_MATCHTYPEM(x)	((x) << S_FW_FILTER_WR_MATCHTYPEM)
+
 /******************************************************************************
  *  C O M M A N D s
  *********************/
diff --git a/drivers/net/cxgbe/cxgbe.h b/drivers/net/cxgbe/cxgbe.h
index 44f5934d1..27d6e2b84 100644
--- a/drivers/net/cxgbe/cxgbe.h
+++ b/drivers/net/cxgbe/cxgbe.h
@@ -38,6 +38,8 @@  void cxgbe_close(struct adapter *adapter);
 void cxgbe_stats_get(struct port_info *pi, struct port_stats *stats);
 void cxgbevf_stats_get(struct port_info *pi, struct port_stats *stats);
 void cxgbe_stats_reset(struct port_info *pi);
+int cxgbe_poll_for_completion(struct sge_rspq *q, unsigned int us,
+			      unsigned int cnt, struct t4_completion *c);
 int link_start(struct port_info *pi);
 void init_rspq(struct adapter *adap, struct sge_rspq *q, unsigned int us,
 	       unsigned int cnt, unsigned int size, unsigned int iqe_size);
diff --git a/drivers/net/cxgbe/cxgbe_filter.c b/drivers/net/cxgbe/cxgbe_filter.c
index 6b10a8be1..cf83ec9c0 100644
--- a/drivers/net/cxgbe/cxgbe_filter.c
+++ b/drivers/net/cxgbe/cxgbe_filter.c
@@ -33,6 +33,50 @@  int validate_filter(struct adapter *adapter, struct ch_filter_specification *fs)
 	return 0;
 }
 
+/**
+ * Get the queue to which the traffic must be steered to.
+ */
+static unsigned int get_filter_steerq(struct rte_eth_dev *dev,
+				      struct ch_filter_specification *fs)
+{
+	struct port_info *pi = ethdev2pinfo(dev);
+	struct adapter *adapter = pi->adapter;
+	unsigned int iq;
+
+	/*
+	 * If the user has requested steering matching Ingress Packets
+	 * to a specific Queue Set, we need to make sure it's in range
+	 * for the port and map that into the Absolute Queue ID of the
+	 * Queue Set's Response Queue.
+	 */
+	if (!fs->dirsteer) {
+		iq = 0;
+	} else {
+		/*
+		 * If the iq id is greater than the number of qsets,
+		 * then assume it is an absolute qid.
+		 */
+		if (fs->iq < pi->n_rx_qsets)
+			iq = adapter->sge.ethrxq[pi->first_qset +
+						 fs->iq].rspq.abs_id;
+		else
+			iq = fs->iq;
+	}
+
+	return iq;
+}
+
+/* Return an error number if the indicated filter isn't writable ... */
+int writable_filter(struct filter_entry *f)
+{
+	if (f->locked)
+		return -EPERM;
+	if (f->pending)
+		return -EBUSY;
+
+	return 0;
+}
+
 /**
  * Check if entry already filled.
  */
@@ -75,3 +119,315 @@  int cxgbe_alloc_ftid(struct adapter *adap, unsigned int family)
 
 	return pos < size ? pos : -1;
 }
+
+/**
+ * Clear a filter and release any of its resources that we own.  This also
+ * clears the filter's "pending" status.
+ */
+void clear_filter(struct filter_entry *f)
+{
+	/*
+	 * The zeroing of the filter rule below clears the filter valid,
+	 * pending, locked flags etc. so it's all we need for
+	 * this operation.
+	 */
+	memset(f, 0, sizeof(*f));
+}
+
+int set_filter_wr(struct rte_eth_dev *dev, unsigned int fidx)
+{
+	struct adapter *adapter = ethdev2adap(dev);
+	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
+	struct rte_mbuf *mbuf;
+	struct fw_filter_wr *fwr;
+	struct sge_ctrl_txq *ctrlq;
+	unsigned int port_id = ethdev2pinfo(dev)->port_id;
+	int ret;
+
+	ctrlq = &adapter->sge.ctrlq[port_id];
+	mbuf = rte_pktmbuf_alloc(ctrlq->mb_pool);
+	if (!mbuf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mbuf->data_len = sizeof(*fwr);
+	mbuf->pkt_len = mbuf->data_len;
+
+	fwr = rte_pktmbuf_mtod(mbuf, struct fw_filter_wr *);
+	memset(fwr, 0, sizeof(*fwr));
+
+	/*
+	 * Construct the work request to set the filter.
+	 */
+	fwr->op_pkd = cpu_to_be32(V_FW_WR_OP(FW_FILTER_WR));
+	fwr->len16_pkd = cpu_to_be32(V_FW_WR_LEN16(sizeof(*fwr) / 16));
+	fwr->tid_to_iq =
+		cpu_to_be32(V_FW_FILTER_WR_TID(f->tid) |
+			    V_FW_FILTER_WR_RQTYPE(f->fs.type) |
+			    V_FW_FILTER_WR_NOREPLY(0) |
+			    V_FW_FILTER_WR_IQ(f->fs.iq));
+	fwr->del_filter_to_l2tix =
+		cpu_to_be32(V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) |
+			    V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) |
+			    V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) |
+			    V_FW_FILTER_WR_PRIO(f->fs.prio));
+	fwr->ethtype = cpu_to_be16(f->fs.val.ethtype);
+	fwr->ethtypem = cpu_to_be16(f->fs.mask.ethtype);
+	fwr->smac_sel = 0;
+	fwr->rx_chan_rx_rpl_iq =
+		cpu_to_be16(V_FW_FILTER_WR_RX_CHAN(0) |
+			    V_FW_FILTER_WR_RX_RPL_IQ(adapter->sge.fw_evtq.abs_id
+						     ));
+	fwr->ptcl = f->fs.val.proto;
+	fwr->ptclm = f->fs.mask.proto;
+	rte_memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
+	rte_memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
+	rte_memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
+	rte_memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
+	fwr->lp = cpu_to_be16(f->fs.val.lport);
+	fwr->lpm = cpu_to_be16(f->fs.mask.lport);
+	fwr->fp = cpu_to_be16(f->fs.val.fport);
+	fwr->fpm = cpu_to_be16(f->fs.mask.fport);
+
+	/*
+	 * Mark the filter as "pending" and ship off the Filter Work Request.
+	 * When we get the Work Request Reply we'll clear the pending status.
+	 */
+	f->pending = 1;
+	t4_mgmt_tx(ctrlq, mbuf);
+	return 0;
+
+out:
+	return ret;
+}
+
+/**
+ * Set the corresponding entry in the bitmap. 4 slots are
+ * marked for IPv6, whereas only 1 slot is marked for IPv4.
+ */
+static int cxgbe_set_ftid(struct tid_info *t, int fidx, int family)
+{
+	t4_os_lock(&t->ftid_lock);
+	if (rte_bitmap_get(t->ftid_bmap, fidx)) {
+		t4_os_unlock(&t->ftid_lock);
+		return -EBUSY;
+	}
+
+	if (family == FILTER_TYPE_IPV4) {
+		rte_bitmap_set(t->ftid_bmap, fidx);
+	} else {
+		rte_bitmap_set(t->ftid_bmap, fidx);
+		rte_bitmap_set(t->ftid_bmap, fidx + 1);
+		rte_bitmap_set(t->ftid_bmap, fidx + 2);
+		rte_bitmap_set(t->ftid_bmap, fidx + 3);
+	}
+	t4_os_unlock(&t->ftid_lock);
+	return 0;
+}
+
+/**
+ * Clear the corresponding entry in the bitmap. 4 slots are
+ * cleared for IPv6, whereas only 1 slot is cleared for IPv4.
+ */
+static void cxgbe_clear_ftid(struct tid_info *t, int fidx, int family)
+{
+	t4_os_lock(&t->ftid_lock);
+	if (family == FILTER_TYPE_IPV4) {
+		rte_bitmap_clear(t->ftid_bmap, fidx);
+	} else {
+		rte_bitmap_clear(t->ftid_bmap, fidx);
+		rte_bitmap_clear(t->ftid_bmap, fidx + 1);
+		rte_bitmap_clear(t->ftid_bmap, fidx + 2);
+		rte_bitmap_clear(t->ftid_bmap, fidx + 3);
+	}
+	t4_os_unlock(&t->ftid_lock);
+}
+
+/**
+ * Check a Chelsio Filter Request for validity, convert it into our internal
+ * format and send it to the hardware.  Return 0 on success, an error number
+ * otherwise.  We attach any provided filter operation context to the internal
+ * filter specification in order to facilitate signaling completion of the
+ * operation.
+ */
+int cxgbe_set_filter(struct rte_eth_dev *dev, unsigned int filter_id,
+		     struct ch_filter_specification *fs,
+		     struct filter_ctx *ctx)
+{
+	struct port_info *pi = ethdev2pinfo(dev);
+	struct adapter *adapter = pi->adapter;
+	unsigned int fidx, iq, fid_bit = 0;
+	struct filter_entry *f;
+	int ret;
+
+	if (filter_id >= adapter->tids.nftids)
+		return -ERANGE;
+
+	ret = validate_filter(adapter, fs);
+	if (ret)
+		return ret;
+
+	/*
+	 * Ensure filter id is aligned on the 4 slot boundary for IPv6
+	 * maskfull filters.
+	 */
+	if (fs->type)
+		filter_id &= ~(0x3);
+
+	ret = is_filter_set(&adapter->tids, filter_id, fs->type);
+	if (ret)
+		return -EBUSY;
+
+	iq = get_filter_steerq(dev, fs);
+
+	/*
+	 * IPv6 filters occupy four slots and must be aligned on
+	 * four-slot boundaries.  IPv4 filters only occupy a single
+	 * slot and have no alignment requirements but writing a new
+	 * IPv4 filter into the middle of an existing IPv6 filter
+	 * requires clearing the old IPv6 filter.
+	 */
+	if (fs->type == FILTER_TYPE_IPV4) { /* IPv4 */
+		/*
+		 * If our IPv4 filter isn't being written to a
+		 * multiple of four filter index and there's an IPv6
+		 * filter at the multiple of 4 base slot, then we need
+		 * to delete that IPv6 filter ...
+		 */
+		fidx = filter_id & ~0x3;
+		if (fidx != filter_id && adapter->tids.ftid_tab[fidx].fs.type) {
+			f = &adapter->tids.ftid_tab[fidx];
+			if (f->valid)
+				return -EBUSY;
+		}
+	} else { /* IPv6 */
+		/*
+		 * Ensure that the IPv6 filter is aligned on a
+		 * multiple of 4 boundary.
+		 */
+		if (filter_id & 0x3)
+			return -EINVAL;
+
+		/*
+		 * Check all except the base overlapping IPv4 filter
+		 * slots.
+		 */
+		for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) {
+			f = &adapter->tids.ftid_tab[fidx];
+			if (f->valid)
+				return -EBUSY;
+		}
+	}
+
+	/*
+	 * Check to make sure that provided filter index is not
+	 * already in use by someone else
+	 */
+	f = &adapter->tids.ftid_tab[filter_id];
+	if (f->valid)
+		return -EBUSY;
+
+	fidx = adapter->tids.ftid_base + filter_id;
+	fid_bit = filter_id;
+	ret = cxgbe_set_ftid(&adapter->tids, fid_bit,
+			     fs->type ? FILTER_TYPE_IPV6 : FILTER_TYPE_IPV4);
+	if (ret)
+		return ret;
+
+	/*
+	 * Check to make sure the filter requested is writable ...
+	 */
+	ret = writable_filter(f);
+	if (ret) {
+		/* Clear the bits we have set above */
+		cxgbe_clear_ftid(&adapter->tids, fid_bit,
+				 fs->type ? FILTER_TYPE_IPV6 :
+					    FILTER_TYPE_IPV4);
+		return ret;
+	}
+
+	/*
+	 * Convert the filter specification into our internal format.
+	 * We copy the PF/VF specification into the Outer VLAN field
+	 * here so the rest of the code -- including the interface to
+	 * the firmware -- doesn't have to constantly do these checks.
+	 */
+	f->fs = *fs;
+	f->fs.iq = iq;
+	f->dev = dev;
+
+	/*
+	 * Attempt to set the filter.  If we don't succeed, we clear
+	 * it and return the failure.
+	 */
+	f->ctx = ctx;
+	f->tid = fidx; /* Save the actual tid */
+	ret = set_filter_wr(dev, filter_id);
+	if (ret) {
+		fid_bit = f->tid - adapter->tids.ftid_base;
+		cxgbe_clear_ftid(&adapter->tids, fid_bit,
+				 fs->type ? FILTER_TYPE_IPV6 :
+					    FILTER_TYPE_IPV4);
+		clear_filter(f);
+	}
+
+	return ret;
+}
+
+/**
+ * Handle a LE-TCAM filter write/deletion reply.
+ */
+void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
+{
+	struct filter_entry *f = NULL;
+	unsigned int tid = GET_TID(rpl);
+	int idx, max_fidx = adap->tids.nftids;
+
+	/* Get the corresponding filter entry for this tid */
+	if (adap->tids.ftid_tab) {
+		/* Check this in normal filter region */
+		idx = tid - adap->tids.ftid_base;
+		if (idx >= max_fidx)
+			return;
+
+		f = &adap->tids.ftid_tab[idx];
+		if (f->tid != tid)
+			return;
+	}
+
+	/* We found the filter entry for this tid */
+	if (f) {
+		unsigned int ret = G_COOKIE(rpl->cookie);
+		struct filter_ctx *ctx;
+
+		/*
+		 * Pull off any filter operation context attached to the
+		 * filter.
+		 */
+		ctx = f->ctx;
+		f->ctx = NULL;
+
+		if (ret == FW_FILTER_WR_FLT_ADDED) {
+			f->pending = 0;  /* asynchronous setup completed */
+			f->valid = 1;
+			if (ctx) {
+				ctx->tid = f->tid;
+				ctx->result = 0;
+			}
+		} else {
+			/*
+			 * Something went wrong.  Issue a warning about the
+			 * problem and clear everything out.
+			 */
+			dev_warn(adap, "filter %u setup failed with error %u\n",
+				 idx, ret);
+			clear_filter(f);
+			if (ctx)
+				ctx->result = -EINVAL;
+		}
+
+		if (ctx)
+			t4_complete(&ctx->completion);
+	}
+}
diff --git a/drivers/net/cxgbe/cxgbe_filter.h b/drivers/net/cxgbe/cxgbe_filter.h
index a9d2d3d39..e12baa7f9 100644
--- a/drivers/net/cxgbe/cxgbe_filter.h
+++ b/drivers/net/cxgbe/cxgbe_filter.h
@@ -112,14 +112,39 @@  enum filter_type {
 	FILTER_TYPE_IPV6,
 };
 
+struct t4_completion {
+	unsigned int done;       /* completion done (0 - No, 1 - Yes) */
+	rte_spinlock_t lock;     /* completion lock */
+};
+
+/*
+ * Filter operation context to allow callers to wait for
+ * an asynchronous completion.
+ */
+struct filter_ctx {
+	struct t4_completion completion; /* completion rendezvous */
+	int result;                      /* result of operation */
+	u32 tid;                         /* to store tid of hash filter */
+};
+
 /*
  * Host shadow copy of ingress filter entry.  This is in host native format
  * and doesn't match the ordering or bit order, etc. of the hardware or the
  * firmware command.
  */
 struct filter_entry {
+	/*
+	 * Administrative fields for filter.
+	 */
+	u32 valid:1;                /* filter allocated and valid */
+	u32 locked:1;               /* filter is administratively locked */
+	u32 pending:1;              /* filter action is pending FW reply */
+	struct filter_ctx *ctx;     /* caller's completion hook */
 	struct rte_eth_dev *dev;    /* Port's rte eth device */
 
+	/* This will store the actual tid */
+	u32 tid;
+
 	/*
 	 * The filter itself.
 	 */
@@ -183,6 +208,13 @@  cxgbe_bitmap_find_free_region(struct rte_bitmap *bmap, unsigned int size,
 }
 
 bool is_filter_set(struct tid_info *, int fidx, int family);
+void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl);
+void clear_filter(struct filter_entry *f);
+int set_filter_wr(struct rte_eth_dev *dev, unsigned int fidx);
+int writable_filter(struct filter_entry *f);
+int cxgbe_set_filter(struct rte_eth_dev *dev, unsigned int filter_id,
+		     struct ch_filter_specification *fs,
+		     struct filter_ctx *ctx);
 int cxgbe_alloc_ftid(struct adapter *adap, unsigned int family);
 int validate_filter(struct adapter *adap, struct ch_filter_specification *fs);
 #endif /* _CXGBE_FILTER_H_ */
diff --git a/drivers/net/cxgbe/cxgbe_flow.c b/drivers/net/cxgbe/cxgbe_flow.c
index a01708e70..7fa3f5810 100644
--- a/drivers/net/cxgbe/cxgbe_flow.c
+++ b/drivers/net/cxgbe/cxgbe_flow.c
@@ -391,6 +391,86 @@  cxgbe_flow_parse(struct rte_flow *flow,
 	return cxgbe_rtef_parse_actions(flow, action, e);
 }
 
+static int __cxgbe_flow_create(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+	struct ch_filter_specification *fs = &flow->fs;
+	struct adapter *adap = ethdev2adap(dev);
+	struct filter_ctx ctx;
+	unsigned int fidx;
+	int err;
+
+	if (cxgbe_get_fidx(flow, &fidx))
+		return -ENOMEM;
+	if (cxgbe_verify_fidx(flow, fidx, 0))
+		return -1;
+
+	t4_init_completion(&ctx.completion);
+	/* go create the filter */
+	err = cxgbe_set_filter(dev, fidx, fs, &ctx);
+	if (err) {
+		dev_err(adap, "Error %d while creating filter.\n", err);
+		return err;
+	}
+
+	/* Poll the FW for reply */
+	err = cxgbe_poll_for_completion(&adap->sge.fw_evtq,
+					CXGBE_FLOW_POLL_US,
+					CXGBE_FLOW_POLL_CNT,
+					&ctx.completion);
+	if (err) {
+		dev_err(adap, "Filter set operation timed out (%d)\n", err);
+		return err;
+	}
+	if (ctx.result) {
+		dev_err(adap, "Hardware error %d while creating the filter.\n",
+			ctx.result);
+		return ctx.result;
+	}
+
+	flow->fidx = fidx;
+	flow->f = &adap->tids.ftid_tab[fidx];
+
+	return 0;
+}
+
+static struct rte_flow *
+cxgbe_flow_create(struct rte_eth_dev *dev,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item item[],
+		  const struct rte_flow_action action[],
+		  struct rte_flow_error *e)
+{
+	struct rte_flow *flow;
+	int ret;
+
+	flow = t4_os_alloc(sizeof(struct rte_flow));
+	if (!flow) {
+		rte_flow_error_set(e, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "Unable to allocate memory for"
+				   " filter_entry");
+		return NULL;
+	}
+
+	flow->item_parser = parseitem;
+	flow->dev = dev;
+
+	if (cxgbe_flow_parse(flow, attr, item, action, e)) {
+		t4_os_free(flow);
+		return NULL;
+	}
+
+	/* go, interact with cxgbe_filter */
+	ret = __cxgbe_flow_create(dev, flow);
+	if (ret) {
+		rte_flow_error_set(e, ret, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "Unable to create flow rule");
+		t4_os_free(flow);
+		return NULL;
+	}
+
+	return flow;
+}
+
 static int
 cxgbe_flow_validate(struct rte_eth_dev *dev,
 		    const struct rte_flow_attr *attr,
@@ -443,7 +523,7 @@  cxgbe_flow_validate(struct rte_eth_dev *dev,
 
 static const struct rte_flow_ops cxgbe_flow_ops = {
 	.validate	= cxgbe_flow_validate,
-	.create		= NULL,
+	.create		= cxgbe_flow_create,
 	.destroy	= NULL,
 	.flush		= NULL,
 	.query		= NULL,
diff --git a/drivers/net/cxgbe/cxgbe_flow.h b/drivers/net/cxgbe/cxgbe_flow.h
index 45bc37082..4456376aa 100644
--- a/drivers/net/cxgbe/cxgbe_flow.h
+++ b/drivers/net/cxgbe/cxgbe_flow.h
@@ -7,6 +7,10 @@ 
 
 #include <rte_flow_driver.h>
 #include "cxgbe_filter.h"
+#include "cxgbe.h"
+
+#define CXGBE_FLOW_POLL_US  10
+#define CXGBE_FLOW_POLL_CNT 10
 
 struct chrte_fparse {
 	int (*fptr)(const void *mask, /* currently supported mask */
diff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c
index 5416800de..a00e0700d 100644
--- a/drivers/net/cxgbe/cxgbe_main.c
+++ b/drivers/net/cxgbe/cxgbe_main.c
@@ -86,6 +86,10 @@  static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
 		const struct cpl_fw6_msg *msg = (const void *)rsp;
 
 		t4_handle_fw_rpl(q->adapter, msg->data);
+	} else if (opcode == CPL_SET_TCB_RPL) {
+		const struct cpl_set_tcb_rpl *p = (const void *)rsp;
+
+		filter_rpl(q->adapter, p);
 	} else {
 		dev_err(adapter, "unexpected CPL %#x on FW event queue\n",
 			opcode);
@@ -135,6 +139,38 @@  int setup_sge_ctrl_txq(struct adapter *adapter)
 	return err;
 }
 
+/**
+ * cxgbe_poll_for_completion: Poll rxq for completion
+ * @q: rxq to poll
+ * @us: microseconds to delay
+ * @cnt: number of times to poll
+ * @c: completion to check for 'done' status
+ *
+ * Polls the rxq for reples until completion is done or the count
+ * expires.
+ */
+int cxgbe_poll_for_completion(struct sge_rspq *q, unsigned int us,
+			      unsigned int cnt, struct t4_completion *c)
+{
+	unsigned int i;
+	unsigned int work_done, budget = 4;
+
+	if (!c)
+		return -EINVAL;
+
+	for (i = 0; i < cnt; i++) {
+		cxgbe_poll(q, NULL, budget, &work_done);
+		t4_os_lock(&c->lock);
+		if (c->done) {
+			t4_os_unlock(&c->lock);
+			return 0;
+		}
+		t4_os_unlock(&c->lock);
+		udelay(us);
+	}
+	return -ETIMEDOUT;
+}
+
 int setup_sge_fwevtq(struct adapter *adapter)
 {
 	struct sge *s = &adapter->sge;