@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2014-2016 Chelsio Communications.
+ * Copyright(c) 2014-2017 Chelsio Communications.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -459,7 +459,9 @@ static inline void t4_write_reg64(struct adapter *adapter, u32 reg_addr,
#define PCI_CAP_ID_EXP 0x10 /* PCI Express */
#define PCI_CAP_LIST_ID 0 /* Capability ID */
#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */
+#define PCI_EXP_DEVCTL 0x0008 /* Device control */
#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */
+#define PCI_EXP_DEVCTL_PAYLOAD 0x00E0 /* Max payload */
#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */
#define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */
#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */
@@ -282,6 +282,7 @@ int t4_fw_bye(struct adapter *adap, unsigned int mbox);
int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset);
int t4_fw_halt(struct adapter *adap, unsigned int mbox, int reset);
int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset);
+int t4_fl_pkt_align(struct adapter *adap);
int t4_fixup_host_params_compat(struct adapter *adap, unsigned int page_size,
unsigned int cache_line_size,
enum chip_type chip_compat);
@@ -3358,6 +3358,49 @@ int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset)
}
/**
+ * t4_fl_pkt_align - return the fl packet alignment
+ * @adap: the adapter
+ *
+ * T4 has a single field to specify the packing and padding boundary.
+ * T5 onwards has separate fields for this and hence the alignment for
+ * next packet offset is maximum of these two.
+ */
+int t4_fl_pkt_align(struct adapter *adap)
+{
+ u32 sge_control, sge_control2;
+ unsigned int ingpadboundary, ingpackboundary, fl_align, ingpad_shift;
+
+ sge_control = t4_read_reg(adap, A_SGE_CONTROL);
+
+ /* T4 uses a single control field to specify both the PCIe Padding and
+ * Packing Boundary. T5 introduced the ability to specify these
+ * separately. The actual Ingress Packet Data alignment boundary
+ * within Packed Buffer Mode is the maximum of these two
+ * specifications.
+ */
+ if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
+ ingpad_shift = X_INGPADBOUNDARY_SHIFT;
+ else
+ ingpad_shift = X_T6_INGPADBOUNDARY_SHIFT;
+
+ ingpadboundary = 1 << (G_INGPADBOUNDARY(sge_control) + ingpad_shift);
+
+ fl_align = ingpadboundary;
+ if (!is_t4(adap->params.chip)) {
+ sge_control2 = t4_read_reg(adap, A_SGE_CONTROL2);
+ ingpackboundary = G_INGPACKBOUNDARY(sge_control2);
+ if (ingpackboundary == X_INGPACKBOUNDARY_16B)
+ ingpackboundary = 16;
+ else
+ ingpackboundary = 1 << (ingpackboundary +
+ X_INGPACKBOUNDARY_SHIFT);
+
+ fl_align = max(ingpadboundary, ingpackboundary);
+ }
+ return fl_align;
+}
+
+/**
* t4_fixup_host_params_compat - fix up host-dependent parameters
* @adap: the adapter
* @page_size: the host's Base Page Size
@@ -3402,6 +3445,10 @@ int t4_fixup_host_params_compat(struct adapter *adap,
X_INGPADBOUNDARY_SHIFT) |
V_EGRSTATUSPAGESIZE(stat_len != 64));
else {
+ unsigned int pack_align;
+ unsigned int ingpad, ingpack;
+ unsigned int pcie_cap;
+
/*
* T5 introduced the separation of the Free List Padding and
* Packing Boundaries. Thus, we can select a smaller Padding
@@ -3415,12 +3462,34 @@ int t4_fixup_host_params_compat(struct adapter *adap,
* Size (the minimum unit of transfer to/from Memory). If we
* have a Padding Boundary which is smaller than the Memory
* Line Size, that'll involve a Read-Modify-Write cycle on the
- * Memory Controller which is never good. For T5 the smallest
- * Padding Boundary which we can select is 32 bytes which is
- * larger than any known Memory Controller Line Size so we'll
- * use that.
+ * Memory Controller which is never good.
*/
+ /* We want the Packing Boundary to be based on the Cache Line
+ * Size in order to help avoid False Sharing performance
+ * issues between CPUs, etc. We also want the Packing
+ * Boundary to incorporate the PCI-E Maximum Payload Size. We
+ * get best performance when the Packing Boundary is a
+ * multiple of the Maximum Payload Size.
+ */
+ pack_align = fl_align;
+ pcie_cap = t4_os_find_pci_capability(adap, PCI_CAP_ID_EXP);
+ if (pcie_cap) {
+ unsigned int mps, mps_log;
+ u16 devctl;
+
+ /* The PCIe Device Control Maximum Payload Size field
+ * [bits 7:5] encodes sizes as powers of 2 starting at
+ * 128 bytes.
+ */
+ t4_os_pci_read_cfg2(adap, pcie_cap + PCI_EXP_DEVCTL,
+ &devctl);
+ mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7;
+ mps = 1 << mps_log;
+ if (mps > pack_align)
+ pack_align = mps;
+ }
+
/*
* N.B. T5 has a different interpretation of the "0" value for
* the Packing Boundary. This corresponds to 16 bytes instead
@@ -3429,19 +3498,36 @@ int t4_fixup_host_params_compat(struct adapter *adap,
* on the other hand, if we wanted 32 bytes, the best we can
* really do is 64 bytes ...
*/
- if (fl_align <= 32) {
+ if (pack_align <= 16) {
+ ingpack = X_INGPACKBOUNDARY_16B;
+ fl_align = 16;
+ } else if (pack_align == 32) {
+ ingpack = X_INGPACKBOUNDARY_64B;
fl_align = 64;
- fl_align_log = 6;
+ } else {
+ unsigned int pack_align_log = cxgbe_fls(pack_align) - 1;
+
+ ingpack = pack_align_log - X_INGPACKBOUNDARY_SHIFT;
+ fl_align = pack_align;
}
+
+ /* Use the smallest Ingress Padding which isn't smaller than
+ * the Memory Controller Read/Write Size. We'll take that as
+ * being 8 bytes since we don't know of any system with a
+ * wider Memory Controller Bus Width.
+ */
+ if (is_t5(adap->params.chip))
+ ingpad = X_INGPADBOUNDARY_32B;
+ else
+ ingpad = X_T6_INGPADBOUNDARY_8B;
t4_set_reg_field(adap, A_SGE_CONTROL,
V_INGPADBOUNDARY(M_INGPADBOUNDARY) |
F_EGRSTATUSPAGESIZE,
- V_INGPADBOUNDARY(X_INGPCIEBOUNDARY_32B) |
+ V_INGPADBOUNDARY(ingpad) |
V_EGRSTATUSPAGESIZE(stat_len != 64));
t4_set_reg_field(adap, A_SGE_CONTROL2,
V_INGPACKBOUNDARY(M_INGPACKBOUNDARY),
- V_INGPACKBOUNDARY(fl_align_log -
- X_INGPACKBOUNDARY_SHIFT));
+ V_INGPACKBOUNDARY(ingpack));
}
/*
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2014-2015 Chelsio Communications.
+ * Copyright(c) 2014-2017 Chelsio Communications.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -55,10 +55,15 @@
#define X_RXPKTCPLMODE_SPLIT 1
#define X_INGPCIEBOUNDARY_32B 0
#define X_INGPADBOUNDARY_SHIFT 5
+#define X_INGPADBOUNDARY_32B 0
+
+#define X_T6_INGPADBOUNDARY_SHIFT 3
+#define X_T6_INGPADBOUNDARY_8B 0
/* CONTROL2 register */
#define X_INGPACKBOUNDARY_SHIFT 5
#define X_INGPACKBOUNDARY_16B 0
+#define X_INGPACKBOUNDARY_64B 1
/* GTS register */
#define X_TIMERREG_RESTART_COUNTER 6
@@ -420,7 +420,9 @@ static unsigned int refill_fl_usembufs(struct adapter *adap, struct sge_fl *q,
mbuf->nb_segs = 1;
mbuf->port = rxq->rspq.port_id;
- mapping = (dma_addr_t)(mbuf->buf_physaddr + mbuf->data_off);
+ mapping = (dma_addr_t)RTE_ALIGN(mbuf->buf_physaddr +
+ mbuf->data_off,
+ adap->sge.fl_align);
mapping |= buf_size_idx;
*d++ = cpu_to_be64(mapping);
set_rx_sw_desc(sd, mbuf, mapping);
@@ -1684,8 +1686,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
if (fl) {
struct sge_eth_rxq *rxq = container_of(fl, struct sge_eth_rxq,
fl);
- enum chip_type chip = (enum chip_type)CHELSIO_CHIP_VERSION(
- adap->params.chip);
+ unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
/*
* Allocate the ring for the hardware free list (with space
@@ -1731,9 +1732,12 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
* Hence maximum allowed burst size will be 448 bytes.
*/
c.fl0dcaen_to_fl0cidxfthresh =
- htons(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_128B) |
- V_FW_IQ_CMD_FL0FBMAX((chip <= CHELSIO_T5) ?
- X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B));
+ htons(V_FW_IQ_CMD_FL0FBMIN(chip_ver <= CHELSIO_T5 ?
+ X_FETCHBURSTMIN_128B :
+ X_FETCHBURSTMIN_64B) |
+ V_FW_IQ_CMD_FL0FBMAX(chip_ver <= CHELSIO_T5 ?
+ X_FETCHBURSTMAX_512B :
+ X_FETCHBURSTMAX_256B));
c.fl0size = htons(flsz);
c.fl0addr = cpu_to_be64(fl->addr);
}
@@ -2189,8 +2193,7 @@ static int t4_sge_init_soft(struct adapter *adap)
int t4_sge_init(struct adapter *adap)
{
struct sge *s = &adap->sge;
- u32 sge_control, sge_control2, sge_conm_ctrl;
- unsigned int ingpadboundary, ingpackboundary;
+ u32 sge_control, sge_conm_ctrl;
int ret, egress_threshold;
/*
@@ -2200,34 +2203,7 @@ int t4_sge_init(struct adapter *adap)
sge_control = t4_read_reg(adap, A_SGE_CONTROL);
s->pktshift = G_PKTSHIFT(sge_control);
s->stat_len = (sge_control & F_EGRSTATUSPAGESIZE) ? 128 : 64;
-
- /*
- * T4 uses a single control field to specify both the PCIe Padding and
- * Packing Boundary. T5 introduced the ability to specify these
- * separately. The actual Ingress Packet Data alignment boundary
- * within Packed Buffer Mode is the maximum of these two
- * specifications.
- */
- ingpadboundary = 1 << (G_INGPADBOUNDARY(sge_control) +
- X_INGPADBOUNDARY_SHIFT);
- s->fl_align = ingpadboundary;
-
- if (!is_t4(adap->params.chip) && !adap->use_unpacked_mode) {
- /*
- * T5 has a weird interpretation of one of the PCIe Packing
- * Boundary values. No idea why ...
- */
- sge_control2 = t4_read_reg(adap, A_SGE_CONTROL2);
- ingpackboundary = G_INGPACKBOUNDARY(sge_control2);
- if (ingpackboundary == X_INGPACKBOUNDARY_16B)
- ingpackboundary = 16;
- else
- ingpackboundary = 1 << (ingpackboundary +
- X_INGPACKBOUNDARY_SHIFT);
-
- s->fl_align = max(ingpadboundary, ingpackboundary);
- }
-
+ s->fl_align = t4_fl_pkt_align(adap);
ret = t4_sge_init_soft(adap);
if (ret < 0) {
dev_err(adap, "%s: t4_sge_init_soft failed, error %d\n",