@@ -31,6 +31,7 @@ env:
- DEF_LIB="static" OPTS="-Denable_kmods=false"
- DEF_LIB="shared" OPTS="-Denable_kmods=false"
- DEF_LIB="shared" RUN_TESTS=1 BUILD_DOCS=1
+ - DEF_LIB="shared" OPTS="-Db_lto=true"
matrix:
include:
@@ -100,6 +101,12 @@ matrix:
apt:
packages:
- *extra_packages
+ - env: DEF_LIB="shared" OPTS="-Db_lto=true" EXTRA_PACKAGES=1
+ compiler: gcc
+ addons:
+ apt:
+ packages:
+ - *extra_packages
script: ./.ci/${TRAVIS_OS_NAME}-build.sh
@@ -49,6 +49,11 @@ CONFIG_RTE_FORCE_INTRINSICS=n
#
CONFIG_RTE_ARCH_STRICT_ALIGN=n
+#
+# Enable link time optimization
+#
+CONFIG_RTE_ENABLE_LTO=n
+
#
# Compile to share library
#
@@ -196,3 +196,16 @@ add_project_arguments('-D_GNU_SOURCE', language: 'c')
if is_freebsd
add_project_arguments('-D__BSD_VISIBLE', language: 'c')
endif
+
+if get_option('b_lto')
+ if cc.has_argument('-ffat-lto-objects')
+ add_project_arguments('-ffat-lto-objects', language: 'c')
+ else
+ error('compiler does not support fat LTO objects - please turn LTO off')
+ endif
+ # workaround for gcc bug 81440
+ if cc.get_id() == 'gcc' and cc.version().version_compare('<8.0')
+ add_project_arguments('-Wno-lto-type-mismatch', language: 'c')
+ add_project_link_arguments('-Wno-lto-type-mismatch', language: 'c')
+ endif
+endif
new file mode 100644
@@ -0,0 +1,36 @@
+Link Time Optimization
+======================
+
+The DPDK framework supports compilation with link time optimization
+turned on. This depends obviously on the capabilities of the compiler
+to do "whole program" optimization at link time and is available only
+for compilers that support that feature (gcc and icc). To be more
+specific compiler have to support creation of ELF objects containing
+both normal code and internal representation (fat-lto-objects). This is
+required since during build some code is generated by parsing produced
+ELF objects (pmdinfogen).
+
+The amount of performance gain that one can get from LTO depends on the
+compiler and the code that is being compiled. However LTO is also
+useful for additional code analysis done by the compiler. In particular
+due to interprocedural analysis compiler can produce additional warnings
+about variables that might be used uninitialized. Some of these
+warnings might be "false positives" though and you might need to
+explicitly initialize variable in order to silence the compiler.
+
+Link time optimization can be enabled for whole DPDK framework by
+setting:
+
+.. code-block:: console
+ CONFIG_ENABLE_LTO=y
+
+in config file for the case of make based build and by:
+
+.. code-block:: console
+ meson build -Db_lto=true
+ ninja -C build
+
+for the case of meson based build.
+
+Please note that turning LTO on causes considerable extension of
+compilation time.
@@ -56,6 +56,14 @@ New Features
Also, make sure to start the actual text at the margin.
=========================================================
+**Added build support for Link Time Optimization.**
+
+ LTO is an optimization technique used by the compiler to perform whole
+ program analysis and optimization at link time. In order to do that
+ compilers store their internal representation of the source code that
+ the linker uses at the final stage of compilation process.
+
+ See :doc:`../prog_guide/lto` for more information:
Removed Items
-------------
@@ -32,7 +32,7 @@ EAL_REGISTER_TAILQ(rte_dist_burst_tailq)
/**** Burst Packet APIs called by workers ****/
-void
+void __vsym
rte_distributor_request_pkt_v1705(struct rte_distributor *d,
unsigned int worker_id, struct rte_mbuf **oldpkt,
unsigned int count)
@@ -84,7 +84,7 @@ MAP_STATIC_SYMBOL(void rte_distributor_request_pkt(struct rte_distributor *d,
unsigned int count),
rte_distributor_request_pkt_v1705);
-int
+int __vsym
rte_distributor_poll_pkt_v1705(struct rte_distributor *d,
unsigned int worker_id, struct rte_mbuf **pkts)
{
@@ -124,7 +124,7 @@ MAP_STATIC_SYMBOL(int rte_distributor_poll_pkt(struct rte_distributor *d,
unsigned int worker_id, struct rte_mbuf **pkts),
rte_distributor_poll_pkt_v1705);
-int
+int __vsym
rte_distributor_get_pkt_v1705(struct rte_distributor *d,
unsigned int worker_id, struct rte_mbuf **pkts,
struct rte_mbuf **oldpkt, unsigned int return_count)
@@ -159,7 +159,7 @@ MAP_STATIC_SYMBOL(int rte_distributor_get_pkt(struct rte_distributor *d,
struct rte_mbuf **oldpkt, unsigned int return_count),
rte_distributor_get_pkt_v1705);
-int
+int __vsym
rte_distributor_return_pkt_v1705(struct rte_distributor *d,
unsigned int worker_id, struct rte_mbuf **oldpkt, int num)
{
@@ -335,7 +335,7 @@ release(struct rte_distributor *d, unsigned int wkr)
/* process a set of packets to distribute them to workers */
-int
+int __vsym
rte_distributor_process_v1705(struct rte_distributor *d,
struct rte_mbuf **mbufs, unsigned int num_mbufs)
{
@@ -476,7 +476,7 @@ MAP_STATIC_SYMBOL(int rte_distributor_process(struct rte_distributor *d,
rte_distributor_process_v1705);
/* return to the caller, packets returned from workers */
-int
+int __vsym
rte_distributor_returned_pkts_v1705(struct rte_distributor *d,
struct rte_mbuf **mbufs, unsigned int max_mbufs)
{
@@ -526,7 +526,7 @@ total_outstanding(const struct rte_distributor *d)
* Flush the distributor, so that there are no outstanding packets in flight or
* queued up.
*/
-int
+int __vsym
rte_distributor_flush_v1705(struct rte_distributor *d)
{
unsigned int flushed;
@@ -561,7 +561,7 @@ MAP_STATIC_SYMBOL(int rte_distributor_flush(struct rte_distributor *d),
rte_distributor_flush_v1705);
/* clears the internal returns array in the distributor */
-void
+void __vsym
rte_distributor_clear_returns_v1705(struct rte_distributor *d)
{
unsigned int wkr;
@@ -581,7 +581,7 @@ MAP_STATIC_SYMBOL(void rte_distributor_clear_returns(struct rte_distributor *d),
rte_distributor_clear_returns_v1705);
/* creates a distributor instance */
-struct rte_distributor *
+struct rte_distributor * __vsym
rte_distributor_create_v1705(const char *name,
unsigned int socket_id,
unsigned int num_workers,
@@ -27,7 +27,7 @@ EAL_REGISTER_TAILQ(rte_distributor_tailq)
/**** APIs called by workers ****/
-void
+void __vsym
rte_distributor_request_pkt_v20(struct rte_distributor_v20 *d,
unsigned worker_id, struct rte_mbuf *oldpkt)
{
@@ -40,7 +40,7 @@ rte_distributor_request_pkt_v20(struct rte_distributor_v20 *d,
}
VERSION_SYMBOL(rte_distributor_request_pkt, _v20, 2.0);
-struct rte_mbuf *
+struct rte_mbuf * __vsym
rte_distributor_poll_pkt_v20(struct rte_distributor_v20 *d,
unsigned worker_id)
{
@@ -54,7 +54,7 @@ rte_distributor_poll_pkt_v20(struct rte_distributor_v20 *d,
}
VERSION_SYMBOL(rte_distributor_poll_pkt, _v20, 2.0);
-struct rte_mbuf *
+struct rte_mbuf * __vsym
rte_distributor_get_pkt_v20(struct rte_distributor_v20 *d,
unsigned worker_id, struct rte_mbuf *oldpkt)
{
@@ -66,7 +66,7 @@ rte_distributor_get_pkt_v20(struct rte_distributor_v20 *d,
}
VERSION_SYMBOL(rte_distributor_get_pkt, _v20, 2.0);
-int
+int __vsym
rte_distributor_return_pkt_v20(struct rte_distributor_v20 *d,
unsigned worker_id, struct rte_mbuf *oldpkt)
{
@@ -191,7 +191,7 @@ process_returns(struct rte_distributor_v20 *d)
}
/* process a set of packets to distribute them to workers */
-int
+int __vsym
rte_distributor_process_v20(struct rte_distributor_v20 *d,
struct rte_mbuf **mbufs, unsigned num_mbufs)
{
@@ -296,7 +296,7 @@ rte_distributor_process_v20(struct rte_distributor_v20 *d,
VERSION_SYMBOL(rte_distributor_process, _v20, 2.0);
/* return to the caller, packets returned from workers */
-int
+int __vsym
rte_distributor_returned_pkts_v20(struct rte_distributor_v20 *d,
struct rte_mbuf **mbufs, unsigned max_mbufs)
{
@@ -334,7 +334,7 @@ total_outstanding(const struct rte_distributor_v20 *d)
/* flush the distributor, so that there are no outstanding packets in flight or
* queued up. */
-int
+int __vsym
rte_distributor_flush_v20(struct rte_distributor_v20 *d)
{
const unsigned flushed = total_outstanding(d);
@@ -347,7 +347,7 @@ rte_distributor_flush_v20(struct rte_distributor_v20 *d)
VERSION_SYMBOL(rte_distributor_flush, _v20, 2.0);
/* clears the internal returns array in the distributor */
-void
+void __vsym
rte_distributor_clear_returns_v20(struct rte_distributor_v20 *d)
{
d->returns.start = d->returns.count = 0;
@@ -358,7 +358,7 @@ rte_distributor_clear_returns_v20(struct rte_distributor_v20 *d)
VERSION_SYMBOL(rte_distributor_clear_returns, _v20, 2.0);
/* creates a distributor instance */
-struct rte_distributor_v20 *
+struct rte_distributor_v20 * __vsym
rte_distributor_create_v20(const char *name,
unsigned socket_id,
unsigned num_workers)
@@ -89,7 +89,7 @@ depth_to_range(uint8_t depth)
/*
* Find an existing lpm table and return a pointer to it.
*/
-struct rte_lpm_v20 *
+struct rte_lpm_v20 * __vsym
rte_lpm_find_existing_v20(const char *name)
{
struct rte_lpm_v20 *l = NULL;
@@ -115,7 +115,7 @@ rte_lpm_find_existing_v20(const char *name)
}
VERSION_SYMBOL(rte_lpm_find_existing, _v20, 2.0);
-struct rte_lpm *
+struct rte_lpm * __vsym
rte_lpm_find_existing_v1604(const char *name)
{
struct rte_lpm *l = NULL;
@@ -146,7 +146,7 @@ MAP_STATIC_SYMBOL(struct rte_lpm *rte_lpm_find_existing(const char *name),
/*
* Allocates memory for LPM object
*/
-struct rte_lpm_v20 *
+struct rte_lpm_v20 * __vsym
rte_lpm_create_v20(const char *name, int socket_id, int max_rules,
__rte_unused int flags)
{
@@ -219,7 +219,7 @@ rte_lpm_create_v20(const char *name, int socket_id, int max_rules,
}
VERSION_SYMBOL(rte_lpm_create, _v20, 2.0);
-struct rte_lpm *
+struct rte_lpm * __vsym
rte_lpm_create_v1604(const char *name, int socket_id,
const struct rte_lpm_config *config)
{
@@ -328,7 +328,7 @@ MAP_STATIC_SYMBOL(
/*
* Deallocates memory for given LPM table.
*/
-void
+void __vsym
rte_lpm_free_v20(struct rte_lpm_v20 *lpm)
{
struct rte_lpm_list *lpm_list;
@@ -357,7 +357,7 @@ rte_lpm_free_v20(struct rte_lpm_v20 *lpm)
}
VERSION_SYMBOL(rte_lpm_free, _v20, 2.0);
-void
+void __vsym
rte_lpm_free_v1604(struct rte_lpm *lpm)
{
struct rte_lpm_list *lpm_list;
@@ -1176,7 +1176,7 @@ add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
/*
* Add a route
*/
-int
+int __vsym
rte_lpm_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
uint8_t next_hop)
{
@@ -1217,7 +1217,7 @@ rte_lpm_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
}
VERSION_SYMBOL(rte_lpm_add, _v20, 2.0);
-int
+int __vsym
rte_lpm_add_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
uint32_t next_hop)
{
@@ -1263,7 +1263,7 @@ MAP_STATIC_SYMBOL(int rte_lpm_add(struct rte_lpm *lpm, uint32_t ip,
/*
* Look for a rule in the high-level rules table
*/
-int
+int __vsym
rte_lpm_is_rule_present_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
uint8_t *next_hop)
{
@@ -1290,7 +1290,7 @@ uint8_t *next_hop)
}
VERSION_SYMBOL(rte_lpm_is_rule_present, _v20, 2.0);
-int
+int __vsym
rte_lpm_is_rule_present_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
uint32_t *next_hop)
{
@@ -1843,7 +1843,7 @@ delete_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
/*
* Deletes a rule
*/
-int
+int __vsym
rte_lpm_delete_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth)
{
int32_t rule_to_delete_index, sub_rule_index;
@@ -1897,7 +1897,7 @@ rte_lpm_delete_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth)
}
VERSION_SYMBOL(rte_lpm_delete, _v20, 2.0);
-int
+int __vsym
rte_lpm_delete_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth)
{
int32_t rule_to_delete_index, sub_rule_index;
@@ -1956,7 +1956,7 @@ MAP_STATIC_SYMBOL(int rte_lpm_delete(struct rte_lpm *lpm, uint32_t ip,
/*
* Delete all rules from the LPM table.
*/
-void
+void __vsym
rte_lpm_delete_all_v20(struct rte_lpm_v20 *lpm)
{
/* Zero rule information. */
@@ -1973,7 +1973,7 @@ rte_lpm_delete_all_v20(struct rte_lpm_v20 *lpm)
}
VERSION_SYMBOL(rte_lpm_delete_all, _v20, 2.0);
-void
+void __vsym
rte_lpm_delete_all_v1604(struct rte_lpm *lpm)
{
/* Zero rule information. */
@@ -811,7 +811,7 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
/*
* Add a route
*/
-int
+int __vsym
rte_lpm6_add_v20(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
uint8_t next_hop)
{
@@ -861,7 +861,7 @@ simulate_add(struct rte_lpm6 *lpm, const uint8_t *masked_ip, uint8_t depth)
return 0;
}
-int
+int __vsym
rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
uint32_t next_hop)
{
@@ -954,7 +954,7 @@ lookup_step(const struct rte_lpm6 *lpm, const struct rte_lpm6_tbl_entry *tbl,
/*
* Looks up an IP
*/
-int
+int __vsym
rte_lpm6_lookup_v20(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop)
{
uint32_t next_hop32 = 0;
@@ -972,7 +972,7 @@ rte_lpm6_lookup_v20(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop)
}
VERSION_SYMBOL(rte_lpm6_lookup, _v20, 2.0);
-int
+int __vsym
rte_lpm6_lookup_v1705(const struct rte_lpm6 *lpm, uint8_t *ip,
uint32_t *next_hop)
{
@@ -1007,7 +1007,7 @@ MAP_STATIC_SYMBOL(int rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip,
/*
* Looks up a group of IP addresses
*/
-int
+int __vsym
rte_lpm6_lookup_bulk_func_v20(const struct rte_lpm6 *lpm,
uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE],
int16_t * next_hops, unsigned n)
@@ -1048,7 +1048,7 @@ rte_lpm6_lookup_bulk_func_v20(const struct rte_lpm6 *lpm,
}
VERSION_SYMBOL(rte_lpm6_lookup_bulk_func, _v20, 2.0);
-int
+int __vsym
rte_lpm6_lookup_bulk_func_v1705(const struct rte_lpm6 *lpm,
uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE],
int32_t *next_hops, unsigned int n)
@@ -1098,7 +1098,7 @@ MAP_STATIC_SYMBOL(int rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
/*
* Look for a rule in the high-level rules table
*/
-int
+int __vsym
rte_lpm6_is_rule_present_v20(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
uint8_t *next_hop)
{
@@ -1118,7 +1118,7 @@ rte_lpm6_is_rule_present_v20(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
}
VERSION_SYMBOL(rte_lpm6_is_rule_present, _v20, 2.0);
-int
+int __vsym
rte_lpm6_is_rule_present_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
uint32_t *next_hop)
{
@@ -131,7 +131,7 @@ rte_timer_data_dealloc(uint32_t id)
return 0;
}
-void
+void __vsym
rte_timer_subsystem_init_v20(void)
{
unsigned lcore_id;
@@ -153,7 +153,7 @@ VERSION_SYMBOL(rte_timer_subsystem_init, _v20, 2.0);
* secondary processes should be empty, the zeroth entry can be shared by
* multiple processes.
*/
-int
+int __vsym
rte_timer_subsystem_init_v1905(void)
{
const struct rte_memzone *mz;
@@ -551,7 +551,7 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
}
/* Reset and start the timer associated with the timer handle tim */
-int
+int __vsym
rte_timer_reset_v20(struct rte_timer *tim, uint64_t ticks,
enum rte_timer_type type, unsigned int tim_lcore,
rte_timer_cb_t fct, void *arg)
@@ -574,7 +574,7 @@ rte_timer_reset_v20(struct rte_timer *tim, uint64_t ticks,
}
VERSION_SYMBOL(rte_timer_reset, _v20, 2.0);
-int
+int __vsym
rte_timer_reset_v1905(struct rte_timer *tim, uint64_t ticks,
enum rte_timer_type type, unsigned int tim_lcore,
rte_timer_cb_t fct, void *arg)
@@ -657,14 +657,14 @@ __rte_timer_stop(struct rte_timer *tim, int local_is_locked,
}
/* Stop the timer associated with the timer handle tim */
-int
+int __vsym
rte_timer_stop_v20(struct rte_timer *tim)
{
return __rte_timer_stop(tim, 0, &default_timer_data);
}
VERSION_SYMBOL(rte_timer_stop, _v20, 2.0);
-int
+int __vsym
rte_timer_stop_v1905(struct rte_timer *tim)
{
return rte_timer_alt_stop(default_data_id, tim);
@@ -817,14 +817,14 @@ __rte_timer_manage(struct rte_timer_data *timer_data)
priv_timer[lcore_id].running_tim = NULL;
}
-void
+void __vsym
rte_timer_manage_v20(void)
{
__rte_timer_manage(&default_timer_data);
}
VERSION_SYMBOL(rte_timer_manage, _v20, 2.0);
-int
+int __vsym
rte_timer_manage_v1905(void)
{
struct rte_timer_data *timer_data;
@@ -1074,14 +1074,14 @@ __rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f)
#endif
}
-void
+void __vsym
rte_timer_dump_stats_v20(FILE *f)
{
__rte_timer_dump_stats(&default_timer_data, f);
}
VERSION_SYMBOL(rte_timer_dump_stats, _v20, 2.0);
-int
+int __vsym
rte_timer_dump_stats_v1905(FILE *f)
{
return rte_timer_alt_dump_stats(default_data_id, f);
@@ -88,6 +88,10 @@ else
MACHINE_CFLAGS := $(filter-out -march% -mtune% -msse%,$(MACHINE_CFLAGS))
endif
+ ifeq ($(shell test $(GCC_VERSION) -lt 45 && echo 1), 1)
+ CONFIG_RTE_ENABLE_LTO=n
+ endif
+
# Disable thunderx PMD for gcc < 4.7
ifeq ($(shell test $(GCC_VERSION) -lt 47 && echo 1), 1)
CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD=d
@@ -62,6 +62,18 @@ endif
# process cpu flags
include $(RTE_SDK)/mk/toolchain/$(RTE_TOOLCHAIN)/rte.toolchain-compat.mk
+ifeq ($(CONFIG_RTE_ENABLE_LTO),y)
+# 'fat-lto' is used since pmdinfogen needs to have 'this_pmd_nameX'
+# exported in symbol table and without this option only internal
+# representation is present.
+TOOLCHAIN_CFLAGS += -flto -ffat-lto-objects
+TOOLCHAIN_LDFLAGS += -flto
+# workaround for GCC bug 81440
+ifeq ($(shell test $(GCC_VERSION) -lt 80 && echo 1), 1)
+WERROR_FLAGS += -Wno-lto-type-mismatch
+endif
+endif
+
# workaround GCC bug with warning "missing initializer" for "= {0}"
ifeq ($(shell test $(GCC_VERSION) -lt 47 && echo 1), 1)
WERROR_FLAGS += -Wno-missing-field-initializers
@@ -54,5 +54,13 @@ endif
# process cpu flags
include $(RTE_SDK)/mk/toolchain/$(RTE_TOOLCHAIN)/rte.toolchain-compat.mk
+ifeq ($(CONFIG_RTE_ENABLE_LTO),y)
+# 'fat-lto' is used since pmdinfogen needs to have 'this_pmd_nameX'
+# exported in symbol table and without this option only internal
+# representation is present.
+TOOLCHAIN_CFLAGS += -flto -ffat-lto-objects
+TOOLCHAIN_LDFLAGS += -flto
+endif
+
export CC AS AR LD OBJCOPY OBJDUMP STRIP READELF
export TOOLCHAIN_CFLAGS TOOLCHAIN_LDFLAGS TOOLCHAIN_ASFLAGS