diff -ur syslog-ng-syslog-ng-4.2.0.orig/CMakeLists.txt syslog-ng-syslog-ng-4.2.0/CMakeLists.txt --- syslog-ng-syslog-ng-4.2.0.orig/CMakeLists.txt 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/CMakeLists.txt 2023-07-19 02:12:21.375815517 +0200 @@ -272,7 +272,7 @@ include(openssl_functions) openssl_set_defines() -pkg_check_modules(LIBPCRE REQUIRED libpcre) +pkg_check_modules(LIBPCRE REQUIRED libpcre2-8) if (WRAP_FOUND) set(SYSLOG_NG_ENABLE_TCP_WRAPPER 1) diff -ur syslog-ng-syslog-ng-4.2.0.orig/configure.ac syslog-ng-syslog-ng-4.2.0/configure.ac --- syslog-ng-syslog-ng-4.2.0.orig/configure.ac 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/configure.ac 2023-07-19 02:12:21.375815517 +0200 @@ -48,7 +48,7 @@ IVYKIS_MIN_VERSION="0.36.1" IVYKIS_UPDATED_VERSION="0.39" JSON_C_MIN_VERSION="0.9" -PCRE_MIN_VERSION="6.1" +PCRE2_MIN_VERSION="10.0" LMC_MIN_VERSION="1.0.0" LRMQ_MIN_VERSION="0.0.1" LRC_MIN_VERSION="1.6.0" @@ -1046,9 +1046,10 @@ LIBS="$old_LIBS" fi -PKG_CHECK_MODULES(PCRE, libpcre >= $PCRE_MIN_VERSION,, PCRE_LIBS="") -if test -z "$PCRE_LIBS"; then - AC_MSG_ERROR(Cannot find pcre version >= $PCRE_MIN_VERSION it is a hard dependency from syslog-ng 3.6 onwards) +PKG_CHECK_MODULES(PCRE2, libpcre2-8 >= $PCRE2_MIN_VERSION,, PCRE2_LIBS="") + +if test test -z "$PCRE2_LIBS"; then + AC_MSG_ERROR(Cannot find pcre2 version >= $PCRE2_MIN_VERSION which is a hard dependency from syslog-ng 3.6 onwards) fi dnl *************************************************************************** @@ -1846,7 +1847,7 @@ python_moduledir="$moduledir"/python python_sysconf_moduledir="${sysconfdir}/python" -CPPFLAGS="$CPPFLAGS $GLIB_CFLAGS $EVTLOG_CFLAGS $PCRE_CFLAGS $OPENSSL_CFLAGS $LIBNET_CFLAGS $LIBDBI_CFLAGS $IVYKIS_CFLAGS $LIBCAP_CFLAGS -D_GNU_SOURCE -D_DEFAULT_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64" +CPPFLAGS="$CPPFLAGS $GLIB_CFLAGS $EVTLOG_CFLAGS $PCRE2_CFLAGS $OPENSSL_CFLAGS $LIBNET_CFLAGS $LIBDBI_CFLAGS $IVYKIS_CFLAGS $LIBCAP_CFLAGS -D_GNU_SOURCE -D_DEFAULT_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64" ######################################################## ## NOTES: on how syslog-ng is linked @@ -1894,7 +1895,7 @@ MODULE_DEPS_LIBS="\$(top_builddir)/lib/libsyslog-ng.la" if test "x$linking_mode" = "xdynamic"; then - SYSLOGNG_DEPS_LIBS="$LIBS $BASE_LIBS $GLIB_LIBS $EVTLOG_LIBS $SECRETSTORAGE_LIBS $RESOLV_LIBS $LIBCAP_LIBS $PCRE_LIBS $REGEX_LIBS $DL_LIBS" + SYSLOGNG_DEPS_LIBS="$LIBS $BASE_LIBS $GLIB_LIBS $EVTLOG_LIBS $SECRETSTORAGE_LIBS $RESOLV_LIBS $LIBCAP_LIBS $PCRE2_LIBS $REGEX_LIBS $DL_LIBS" if test "x$with_ivykis" = "xinternal"; then # when using the internal ivykis, we're linking it statically into libsyslog-ng.so @@ -1913,8 +1914,8 @@ # syslog-ng binary is linked with the default link command (e.g. libtool) SYSLOGNG_LINK='$(LINK)' else - SYSLOGNG_DEPS_LIBS="$LIBS $BASE_LIBS $RESOLV_LIBS $EVTLOG_NO_LIBTOOL_LIBS $SECRETSTORAGE_NO_LIBTOOL_LIBS $LD_START_STATIC -Wl,${WHOLE_ARCHIVE_OPT} $GLIB_LIBS $PCRE_LIBS $REGEX_LIBS -Wl,${NO_WHOLE_ARCHIVE_OPT} $IVYKIS_NO_LIBTOOL_LIBS $LD_END_STATIC $LIBCAP_LIBS $DL_LIBS" - TOOL_DEPS_LIBS="$LIBS $BASE_LIBS $GLIB_LIBS $EVTLOG_LIBS $SECRETSTORAGE_LIBS $RESOLV_LIBS $LIBCAP_LIBS $PCRE_LIBS $REGEX_LIBS $IVYKIS_LIBS $DL_LIBS" + SYSLOGNG_DEPS_LIBS="$LIBS $BASE_LIBS $RESOLV_LIBS $EVTLOG_NO_LIBTOOL_LIBS $SECRETSTORAGE_NO_LIBTOOL_LIBS $LD_START_STATIC -Wl,${WHOLE_ARCHIVE_OPT} $GLIB_LIBS $PCRE2_LIBS $REGEX_LIBS -Wl,${NO_WHOLE_ARCHIVE_OPT} $IVYKIS_NO_LIBTOOL_LIBS $LD_END_STATIC $LIBCAP_LIBS $DL_LIBS" + TOOL_DEPS_LIBS="$LIBS $BASE_LIBS $GLIB_LIBS $EVTLOG_LIBS $SECRETSTORAGE_LIBS $RESOLV_LIBS $LIBCAP_LIBS $PCRE2_LIBS $REGEX_LIBS $IVYKIS_LIBS $DL_LIBS" CORE_DEPS_LIBS="" # bypass libtool in case we want to do mixed linking because it diff -ur syslog-ng-syslog-ng-4.2.0.orig/lib/compat/pcre.h syslog-ng-syslog-ng-4.2.0/lib/compat/pcre.h --- syslog-ng-syslog-ng-4.2.0.orig/lib/compat/pcre.h 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/lib/compat/pcre.h 2023-07-19 02:12:21.376815531 +0200 @@ -26,18 +26,8 @@ #define COMPAT_PCRE_H_INCLUDED #include "compat/compat.h" -#include -#ifndef PCRE_CONFIG_JIT -#define pcre_free_study pcre_free -#endif - -#ifndef PCRE_STUDY_JIT_COMPILE -#define PCRE_STUDY_JIT_COMPILE 0 -#endif - -#ifndef PCRE_NEWLINE_ANYCRLF -#define PCRE_NEWLINE_ANYCRLF 0 -#endif +#define PCRE2_CODE_UNIT_WIDTH 8 +#include #endif /* COMPAT_PCRE_H_INCLUDED */ diff -ur syslog-ng-syslog-ng-4.2.0.orig/lib/filter/tests/test_filters_regexp.c syslog-ng-syslog-ng-4.2.0/lib/filter/tests/test_filters_regexp.c --- syslog-ng-syslog-ng-4.2.0.orig/lib/filter/tests/test_filters_regexp.c 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/lib/filter/tests/test_filters_regexp.c 2023-07-19 02:12:21.376815531 +0200 @@ -53,12 +53,6 @@ const gchar *value; } FilterParamRegexp; -static gboolean -check_pcre_version_is_atleast(const gchar *version) -{ - return strncmp(pcre_version(), version, strlen(version)) >= 0; -} - Test(filter, create_pcre_regexp_filter) { cr_assert_eq(create_pcre_regexp_filter(LM_V_PROGRAM, "((", 0), NULL); @@ -70,8 +64,7 @@ cr_assert_eq(create_pcre_regexp_filter(LM_V_HOST, "(?iana", 0), NULL); cr_assert_eq(create_pcre_regexp_match("((", 0), NULL); cr_assert_eq(create_pcre_regexp_match("(?Pa)", 0), NULL); // Begins with a digit + cr_assert_eq(create_pcre_regexp_match("(?P<1>a)", 0), NULL); // Begins with a digit cr_assert_eq(create_pcre_regexp_match("(?Pa)", 0), NULL); // Begins with an illegal char cr_assert_eq(create_pcre_regexp_match("(?Pa)", 0), NULL); // Ends with an illegal char cr_assert_eq(create_pcre_regexp_match("\\1", 0), NULL); // Backreference diff -ur syslog-ng-syslog-ng-4.2.0.orig/lib/logmatcher.c syslog-ng-syslog-ng-4.2.0/lib/logmatcher.c --- syslog-ng-syslog-ng-4.2.0.orig/lib/logmatcher.c 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/lib/logmatcher.c 2023-07-19 02:12:21.376815531 +0200 @@ -281,87 +281,82 @@ typedef struct _LogMatcherPcreRe { LogMatcher super; - pcre *pattern; - pcre_extra *extra; + pcre2_code *pattern; gint match_options; gchar *nv_prefix; gint nv_prefix_len; } LogMatcherPcreRe; static gboolean -_compile_pcre_regexp(LogMatcherPcreRe *self, const gchar *re, GError **error) +_compile_pcre2_regexp(LogMatcherPcreRe *self, const gchar *re, GError **error) { gint rc; - const gchar *errptr; - gint erroffset; gint flags = 0; g_return_val_if_fail(error == NULL || *error == NULL, FALSE); if (self->super.flags & LMF_ICASE) - flags |= PCRE_CASELESS; + flags |= PCRE2_CASELESS; if (self->super.flags & LMF_NEWLINE) { - if (!PCRE_NEWLINE_ANYCRLF) + if (!PCRE2_NEWLINE_ANYCRLF) msg_warning("syslog-ng was compiled against an old PCRE which doesn't support the 'newline' flag"); - flags |= PCRE_NEWLINE_ANYCRLF; + flags |= PCRE2_NEWLINE_ANYCRLF; } if (self->super.flags & LMF_UTF8) { gint support; - flags |= PCRE_UTF8 | PCRE_NO_UTF8_CHECK; - self->match_options |= PCRE_NO_UTF8_CHECK; + flags |= PCRE2_UTF | PCRE2_NO_UTF_CHECK; + self->match_options |= PCRE2_NO_UTF_CHECK; - pcre_config(PCRE_CONFIG_UTF8, &support); + pcre2_config(PCRE2_CONFIG_UNICODE, &support); if (!support) { - g_set_error(error, LOG_TEMPLATE_ERROR, 0, "PCRE library is compiled without UTF8 support and utf8 flag was present"); + g_set_error(error, LOG_TEMPLATE_ERROR, 0, "PCRE library is compiled without unicode support and utf8 flag was present"); return FALSE; } - pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &support); - if (!support) - { - g_set_error(error, LOG_TEMPLATE_ERROR, 0, - "PCRE library is compiled without UTF8 properties support and utf8 flag was present"); - return FALSE; - } } if (self->super.flags & LMF_DUPNAMES) { - if (!PCRE_DUPNAMES) + if (!PCRE2_DUPNAMES) msg_warning("syslog-ng was compiled against an old PCRE which doesn't support the 'dupnames' flag"); - flags |= PCRE_DUPNAMES; + flags |= PCRE2_DUPNAMES; } /* compile the regexp */ - self->pattern = pcre_compile2(re, flags, &rc, &errptr, &erroffset, NULL); + PCRE2_SIZE error_offset; + + self->pattern = pcre2_compile((PCRE2_SPTR) re, PCRE2_ZERO_TERMINATED, flags, &rc, &error_offset, NULL); if (!self->pattern) { + PCRE2_UCHAR error_message[128]; + + pcre2_get_error_message(rc, error_message, sizeof(error_message)); g_set_error(error, LOG_TEMPLATE_ERROR, 0, "Failed to compile PCRE expression >>>%s<<< `%s' at character %d", - re, errptr, erroffset); + re, error_message, (gint) error_offset); return FALSE; } return TRUE; } static gboolean -_study_pcre_regexp(LogMatcherPcreRe *self, const gchar *re, GError **error) +_jit_pcre2_regexp(LogMatcherPcreRe *self, const gchar *re, GError **error) { - const gchar *errptr; - gint options = 0; - - if ((self->super.flags & LMF_DISABLE_JIT) == 0) - options |= PCRE_STUDY_JIT_COMPILE; + if ((self->super.flags & LMF_DISABLE_JIT)) + return TRUE; /* optimize regexp */ - self->extra = pcre_study(self->pattern, options, &errptr); - if (errptr != NULL) + gint rc = pcre2_jit_compile(self->pattern, PCRE2_JIT_COMPLETE); + if (rc < 0) { - g_set_error(error, LOG_TEMPLATE_ERROR, 0, "Failed to optimize regular expression >>>%s<<< `%s'", - re, errptr); - return FALSE; + PCRE2_UCHAR error_message[128]; + + pcre2_get_error_message(rc, error_message, sizeof(error_message)); + msg_warning("Failed to JIT compile regular expression, you might want to use flags(disable-jit)", + evt_tag_str("regexp", re), + evt_tag_str("error", (gchar *) error_message)); } return TRUE; } @@ -374,10 +369,10 @@ g_return_val_if_fail(error == NULL || *error == NULL, FALSE); log_matcher_store_pattern(s, re); - if (!_compile_pcre_regexp(self, re, error)) + if (!_compile_pcre2_regexp(self, re, error)) return FALSE; - if (!_study_pcre_regexp(self, re, error)) + if (!_jit_pcre2_regexp(self, re, error)) return FALSE; return TRUE; @@ -388,8 +383,7 @@ NVHandle source_handle; const gchar *source_value; gssize source_value_len; - gint *matches; - gint num_matches; + pcre2_match_data *match_data; } LogMatcherPcreMatchResult; static inline void @@ -434,11 +428,13 @@ log_matcher_pcre_re_feed_backrefs(LogMatcherPcreRe *self, LogMessage *msg, LogMatcherPcreMatchResult *result) { gint i; + guint32 num_matches = pcre2_get_ovector_count(result->match_data); + PCRE2_SIZE *matches = pcre2_get_ovector_pointer(result->match_data); - for (i = 0; i < (LOGMSG_MAX_MATCHES) && i < result->num_matches; i++) + for (i = 0; i < (LOGMSG_MAX_MATCHES) && i < num_matches; i++) { - gint begin_index = result->matches[2 * i]; - gint end_index = result->matches[2 * i + 1]; + gint begin_index = matches[2 * i]; + gint end_index = matches[2 * i + 1]; if (begin_index < 0 || end_index < 0) continue; @@ -446,11 +442,11 @@ log_matcher_pcre_re_feed_value(self, msg, log_msg_get_match_handle(i), result, begin_index, end_index); } if (log_msg_is_handle_match(result->source_handle) && - log_msg_get_match_index(result->source_handle) >= result->num_matches) + log_msg_get_match_index(result->source_handle) >= num_matches) { log_matcher_pcre_re_save_source_value_to_avoid_clobbering(result); } - log_msg_truncate_matches(msg, result->num_matches); + log_msg_truncate_matches(msg, num_matches); } static void @@ -458,18 +454,20 @@ { gchar *name_table = NULL; gint i = 0; - gint namecount = 0; - gint name_entry_size = 0; + guint32 namecount = 0; + guint32 name_entry_size = 0; - pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_NAMECOUNT, &namecount); + pcre2_pattern_info(self->pattern, PCRE2_INFO_NAMECOUNT, &namecount); if (namecount > 0) { + PCRE2_SIZE *matches = pcre2_get_ovector_pointer(result->match_data); + gchar *tabptr; /* Before we can access the substrings, we must extract the table for translating names to numbers, and the size of each entry in the table. */ - pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_NAMETABLE, &name_table); - pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size); + pcre2_pattern_info(self->pattern, PCRE2_INFO_NAMETABLE, &name_table); + pcre2_pattern_info(self->pattern, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); /* Now we can scan the table and, for each entry, print the number, the name, and the substring itself. */ @@ -480,8 +478,8 @@ for (i = 0; i < namecount; i++, tabptr += name_entry_size) { int n = (tabptr[0] << 8) | tabptr[1]; - gint begin_index = result->matches[2 * n]; - gint end_index = result->matches[2 * n + 1]; + gint begin_index = matches[2 * n]; + gint end_index = matches[2 * n + 1]; const gchar *namedgroup_name = tabptr + 2; if (begin_index < 0 || end_index < 0) @@ -502,30 +500,28 @@ LogMatcherPcreRe *self = (LogMatcherPcreRe *) s; LogMatcherPcreMatchResult result; gint rc; + gboolean res = TRUE; if (value_len == -1) value_len = strlen(value); - if (pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_CAPTURECOUNT, &result.num_matches) < 0) - g_assert_not_reached(); - if (result.num_matches > LOGMSG_MAX_MATCHES) - result.num_matches = LOGMSG_MAX_MATCHES; - - gsize matches_size = 3 * (result.num_matches + 1); - result.matches = g_alloca(matches_size * sizeof(gint)); + result.match_data = pcre2_match_data_create_from_pattern(self->pattern, NULL); result.source_value = value; result.source_value_len = value_len; result.source_handle = value_handle; - rc = pcre_exec(self->pattern, self->extra, - result.source_value, result.source_value_len, - 0, self->match_options, - result.matches, matches_size); + rc = pcre2_match(self->pattern, + (PCRE2_SPTR) result.source_value, + (PCRE2_SIZE) result.source_value_len, + (PCRE2_SIZE) 0, + self->match_options, + result.match_data, + NULL); if (rc < 0) { switch (rc) { - case PCRE_ERROR_NOMATCH: + case PCRE2_ERROR_NOMATCH: break; default: @@ -534,22 +530,22 @@ evt_tag_int("error_code", rc)); break; } - return FALSE; + res = FALSE; } - if (rc == 0) + else if (rc == 0) { - msg_error("Error while storing matching substrings"); + msg_error("Error while storing matching substrings, more than 256 capture groups encountered"); } else { - result.num_matches = rc; if ((s->flags & LMF_STORE_MATCHES)) { log_matcher_pcre_re_feed_backrefs(self, msg, &result); log_matcher_pcre_re_feed_named_substrings(self, msg, &result); } } - return TRUE; + pcre2_match_data_free(result.match_data); + return res; } static gchar * @@ -559,24 +555,19 @@ LogMatcherPcreRe *self = (LogMatcherPcreRe *) s; LogMatcherPcreMatchResult result; GString *new_value = NULL; - gsize matches_size; gint rc; gint start_offset, last_offset; gint options; gboolean last_match_was_empty; - if (pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_CAPTURECOUNT, &result.num_matches) < 0) - g_assert_not_reached(); - if (result.num_matches > LOGMSG_MAX_MATCHES) - result.num_matches = LOGMSG_MAX_MATCHES; + result.match_data = pcre2_match_data_create_from_pattern(self->pattern, NULL); + PCRE2_SIZE *matches = pcre2_get_ovector_pointer(result.match_data); - matches_size = 3 * (result.num_matches + 1); - result.matches = g_alloca(matches_size * sizeof(gint)); /* we need zero initialized offsets for the last match as the * algorithm tries uses that as the base position */ - result.matches[0] = result.matches[1] = result.matches[2] = 0; + matches[0] = matches[1] = 0; if (value_len == -1) value_len = strlen(value); @@ -596,7 +587,7 @@ * advanced). * * A zero-length match can be as simple as "a*" which will be - * returned unless PCRE_NOTEMPTY is specified. + * returned unless PCRE2_NOTEMPTY is specified. * * By supporting zero-length matches, we basically make it * possible to insert replacement between each incoming @@ -617,17 +608,21 @@ * to see if a non-empty match can be found. */ - options = PCRE_NOTEMPTY | PCRE_ANCHORED; + options = PCRE2_NOTEMPTY | PCRE2_ANCHORED; } else { options = 0; } - rc = pcre_exec(self->pattern, self->extra, - result.source_value, result.source_value_len, - start_offset, (self->match_options | options), result.matches, matches_size); - if (rc < 0 && rc != PCRE_ERROR_NOMATCH) + rc = pcre2_match(self->pattern, + (PCRE2_SPTR) result.source_value, + (PCRE2_SIZE) result.source_value_len, + start_offset, + (self->match_options | options), + result.match_data, + NULL); + if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) { msg_error("Error while matching regexp", evt_tag_int("error_code", rc)); @@ -635,7 +630,7 @@ } else if (rc < 0) { - if ((options & PCRE_NOTEMPTY) == 0) + if ((options & PCRE2_NOTEMPTY) == 0) { /* we didn't match, even when we permitted to match the * empty string. Nothing to find here, bail out */ @@ -651,31 +646,31 @@ last_match_was_empty = FALSE; continue; } + else if (rc == 0) + { + msg_error("Error while storing matching substrings, more than 256 capture groups encountered"); + break; + } else { - /* if the output array was too small, truncate the number of - captures to LOGMSG_MAX_MATCHES */ - - if (rc == 0) - rc = matches_size / 3; - - result.num_matches = rc; log_matcher_pcre_re_feed_backrefs(self, msg, &result); log_matcher_pcre_re_feed_named_substrings(self, msg, &result); if (!new_value) new_value = g_string_sized_new(result.source_value_len); /* append non-matching portion */ - g_string_append_len(new_value, &result.source_value[last_offset], result.matches[0] - last_offset); + g_string_append_len(new_value, &result.source_value[last_offset], matches[0] - last_offset); /* replacement */ log_template_append_format(replacement, msg, &DEFAULT_TEMPLATE_EVAL_OPTIONS, new_value); - last_match_was_empty = (result.matches[0] == result.matches[1]); - start_offset = last_offset = result.matches[1]; + last_match_was_empty = (matches[0] == matches[1]); + start_offset = last_offset = matches[1]; } } while (self->super.flags & LMF_GLOBAL && start_offset < result.source_value_len); + pcre2_match_data_free(result.match_data); + if (new_value) { /* append the last literal */ @@ -691,8 +686,7 @@ log_matcher_pcre_re_free(LogMatcher *s) { LogMatcherPcreRe *self = (LogMatcherPcreRe *) s; - pcre_free_study(self->extra); - pcre_free(self->pattern); + pcre2_code_free(self->pattern); log_matcher_free_method(s); } diff -ur syslog-ng-syslog-ng-4.2.0.orig/lib/multi-line/multi-line-pattern.c syslog-ng-syslog-ng-4.2.0/lib/multi-line/multi-line-pattern.c --- syslog-ng-syslog-ng-4.2.0.orig/lib/multi-line/multi-line-pattern.c 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/lib/multi-line/multi-line-pattern.c 2023-07-19 02:12:21.376815531 +0200 @@ -22,69 +22,105 @@ * COPYING for details. */ #include "multi-line/multi-line-pattern.h" +#include "messages.h" MultiLinePattern * multi_line_pattern_compile(const gchar *regexp, GError **error) { MultiLinePattern *self = g_new0(MultiLinePattern, 1); - gint optflags = 0; gint rc; - const gchar *errptr; - gint erroffset; + PCRE2_SIZE erroffset; g_return_val_if_fail(error == NULL || *error == NULL, FALSE); self->ref_cnt = 1; /* compile the regexp */ - self->pattern = pcre_compile2(regexp, 0, &rc, &errptr, &erroffset, NULL); + self->pattern = pcre2_compile((PCRE2_SPTR) regexp, PCRE2_ZERO_TERMINATED, 0, &rc, &erroffset, NULL); if (!self->pattern) { - g_set_error(error, 0, 0, "Error while compiling multi-line regexp as a PCRE expression, error=%s, error_at=%d", errptr, - erroffset); + PCRE2_UCHAR error_message[128]; + + pcre2_get_error_message(rc, error_message, sizeof(error_message)); + g_set_error(error, 0, 0, + "Error while compiling multi-line regexp as a PCRE expression, error=%s, error_at=%" G_GSIZE_FORMAT, + (gchar *) error_message, erroffset); goto error; } -#ifdef PCRE_STUDY_JIT_COMPILE - optflags = PCRE_STUDY_JIT_COMPILE; -#endif - /* optimize regexp */ - self->extra = pcre_study(self->pattern, optflags, &errptr); - if (errptr != NULL) + rc = pcre2_jit_compile(self->pattern, PCRE2_JIT_COMPLETE); + if (rc < 0) { - g_set_error(error, 0, 0, "Error while studying multi-line regexp, error=%s", errptr); - goto error; + PCRE2_UCHAR error_message[128]; + + pcre2_get_error_message(rc, error_message, sizeof(error_message)); + msg_warning("multi-line-pattern: Error while JIT compiling regular expression", + evt_tag_str("regexp", regexp), + evt_tag_str("error", (gchar *) error_message)); } return self; error: if (self->pattern) - pcre_free(self->pattern); + pcre2_code_free(self->pattern); g_free(self); return NULL; } gint -multi_line_pattern_find(MultiLinePattern *re, const guchar *str, gsize len, gint *matches, gint matches_num) +multi_line_pattern_eval(MultiLinePattern *re, const guchar *str, gsize len, pcre2_match_data *match_data) { - gint rc; + return pcre2_match(re->pattern, (PCRE2_SPTR) str, (PCRE2_SIZE) len, 0, 0, match_data, NULL); +} +gboolean +multi_line_pattern_find(MultiLinePattern *re, const guchar *str, gsize len, gint *start, gint *end) +{ if (!re) - return -1; + return FALSE; + + gboolean result = FALSE; + pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re->pattern, NULL); + - rc = pcre_exec(re->pattern, re->extra, (const gchar *) str, len, 0, 0, matches, matches_num * 3); - return rc; + if (multi_line_pattern_eval(re, str, len, match_data) < 0) + goto exit; + + guint32 num_matches = pcre2_get_ovector_count(match_data); + PCRE2_SIZE *matches = pcre2_get_ovector_pointer(match_data); + + if (num_matches == 0) + goto exit; + + *start = matches[0]; + *end = matches[1]; + result = TRUE; +exit: + pcre2_match_data_free(match_data); + return result; } gboolean multi_line_pattern_match(MultiLinePattern *re, const guchar *str, gsize len) { - gint match[3]; - if (multi_line_pattern_find(re, str, len, match, 1) < 0) + if (!re) return FALSE; - return match[0] >= 0; -} + gboolean result = FALSE; + pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re->pattern, NULL); + + if (multi_line_pattern_eval(re, str, len, match_data) < 0) + goto exit; + + guint32 num_matches = pcre2_get_ovector_count(match_data); + PCRE2_SIZE *matches = pcre2_get_ovector_pointer(match_data); + + result = num_matches > 0 && matches[0] >= 0; + +exit: + pcre2_match_data_free(match_data); + return result; +} MultiLinePattern * multi_line_pattern_ref(MultiLinePattern *self) @@ -100,9 +136,7 @@ if (self && (--self->ref_cnt == 0)) { if (self->pattern) - pcre_free(self->pattern); - if (self->extra) - pcre_free_study(self->extra); + pcre2_code_free(self->pattern); g_free(self); } } diff -ur syslog-ng-syslog-ng-4.2.0.orig/lib/multi-line/multi-line-pattern.h syslog-ng-syslog-ng-4.2.0/lib/multi-line/multi-line-pattern.h --- syslog-ng-syslog-ng-4.2.0.orig/lib/multi-line/multi-line-pattern.h 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/lib/multi-line/multi-line-pattern.h 2023-07-19 02:12:21.376815531 +0200 @@ -33,11 +33,10 @@ struct _MultiLinePattern { gint ref_cnt; - pcre *pattern; - pcre_extra *extra; + pcre2_code *pattern; }; -gint multi_line_pattern_find(MultiLinePattern *re, const guchar *str, gsize len, gint *matches, gint matches_num); +gboolean multi_line_pattern_find(MultiLinePattern *re, const guchar *str, gsize len, gint *start, gint *end); gboolean multi_line_pattern_match(MultiLinePattern *re, const guchar *str, gsize len); MultiLinePattern *multi_line_pattern_compile(const gchar *regexp, GError **error); MultiLinePattern *multi_line_pattern_ref(MultiLinePattern *self); diff -ur syslog-ng-syslog-ng-4.2.0.orig/lib/multi-line/regexp-multi-line.c syslog-ng-syslog-ng-4.2.0/lib/multi-line/regexp-multi-line.c --- syslog-ng-syslog-ng-4.2.0.orig/lib/multi-line/regexp-multi-line.c 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/lib/multi-line/regexp-multi-line.c 2023-07-19 02:12:21.377815546 +0200 @@ -27,19 +27,21 @@ static gint _prefix_garbage_get_offset_of_garbage(RegexpMultiLine *self, const guchar *line, gsize line_len) { - gint match[3]; - if (multi_line_pattern_find(self->garbage, line, line_len, match, 1) < 0) + gint start, end; + + if (!multi_line_pattern_find(self->garbage, line, line_len, &start, &end)) return -1; - return match[0]; + return start; } static gint _prefix_suffix_get_offset_of_garbage(RegexpMultiLine *self, const guchar *line, gsize line_len) { - gint match[3]; - if (multi_line_pattern_find(self->garbage, line, line_len, match, 1) < 0) + gint start, end; + + if (!multi_line_pattern_find(self->garbage, line, line_len, &start, &end)) return -1; - return match[1]; + return end; } static gint diff -ur syslog-ng-syslog-ng-4.2.0.orig/modules/basicfuncs/list-funcs.c syslog-ng-syslog-ng-4.2.0/modules/basicfuncs/list-funcs.c --- syslog-ng-syslog-ng-4.2.0.orig/modules/basicfuncs/list-funcs.c 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/modules/basicfuncs/list-funcs.c 2023-07-19 02:12:21.377815546 +0200 @@ -354,8 +354,7 @@ StringMatchMode mode; gchar *pattern; GPatternSpec *glob; - pcre *pcre; - pcre_extra *pcre_extra; + pcre2_code *pcre; } StringMatcher; static gboolean @@ -369,33 +368,35 @@ static gboolean string_matcher_prepare_pcre(StringMatcher *self) { - const gchar *errptr; - gint erroffset; + PCRE2_SIZE erroffset; gint rc; - self->pcre = pcre_compile2(self->pattern, PCRE_ANCHORED, &rc, &errptr, &erroffset, NULL); + self->pcre = pcre2_compile((PCRE2_SPTR) self->pattern, PCRE2_ZERO_TERMINATED, PCRE2_ANCHORED, &rc, &erroffset, NULL); if (!self->pcre) { + PCRE2_UCHAR error_message[128]; + pcre2_get_error_message(rc, error_message, sizeof(error_message)); + msg_error("Error while compiling regular expression", evt_tag_str("regular_expression", self->pattern), evt_tag_str("error_at", &self->pattern[erroffset]), evt_tag_int("error_offset", erroffset), - evt_tag_str("error_message", errptr), + evt_tag_str("error_message", (gchar *) error_message), evt_tag_int("error_code", rc)); return FALSE; } - self->pcre_extra = pcre_study(self->pcre, PCRE_STUDY_JIT_COMPILE, &errptr); - if (errptr) + + /* optimize regexp */ + rc = pcre2_jit_compile(self->pcre, PCRE2_JIT_COMPLETE); + if (rc < 0) { - msg_error("Error while optimizing regular expression", - evt_tag_str("regular_expression", self->pattern), - evt_tag_str("error_message", errptr)); - pcre_free(self->pcre); - if (self->pcre_extra) - pcre_free_study(self->pcre_extra); - return FALSE; - } + PCRE2_UCHAR error_message[128]; + pcre2_get_error_message(rc, error_message, sizeof(error_message)); + msg_warning("$(list-search): Failed to JIT compile regular expression", + evt_tag_str("regexp", self->pattern), + evt_tag_str("error", (gchar *) error_message)); + } return TRUE; } @@ -416,8 +417,11 @@ static gboolean string_matcher_match_pcre(StringMatcher *self, const char *string, gsize string_len) { - gint rc = pcre_exec(self->pcre, self->pcre_extra, string, string_len, 0, 0, NULL, 0); - if (rc == PCRE_ERROR_NOMATCH) + pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(self->pcre, NULL); + gint rc = pcre2_match(self->pcre, (PCRE2_SPTR) string, (PCRE2_SIZE) string_len, 0, 0, match_data, NULL); + pcre2_match_data_free(match_data); + + if (rc == PCRE2_ERROR_NOMATCH) { return FALSE; } @@ -468,9 +472,7 @@ if (self->glob) g_pattern_spec_free(self->glob); if (self->pcre) - pcre_free(self->pcre); - if (self->pcre_extra) - pcre_free_study(self->pcre_extra); + pcre2_code_free(self->pcre); g_free(self); } diff -ur syslog-ng-syslog-ng-4.2.0.orig/modules/basicfuncs/tests/test_basicfuncs.c syslog-ng-syslog-ng-4.2.0/modules/basicfuncs/tests/test_basicfuncs.c --- syslog-ng-syslog-ng-4.2.0.orig/modules/basicfuncs/tests/test_basicfuncs.c 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/modules/basicfuncs/tests/test_basicfuncs.c 2023-07-19 02:12:21.377815546 +0200 @@ -663,3 +663,8 @@ { assert_template_format(param->template, param->expected); } + +Test(basicfuncs, test_performance) +{ + perftest_template("$(list-search --start-index 1 --mode pcre .az '\"foo,\",\"bar\",\"baz\"')"); +} diff -ur syslog-ng-syslog-ng-4.2.0.orig/modules/correlation/group-lines.c syslog-ng-syslog-ng-4.2.0/modules/correlation/group-lines.c --- syslog-ng-syslog-ng-4.2.0.orig/modules/correlation/group-lines.c 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/modules/correlation/group-lines.c 2023-07-19 02:12:21.377815546 +0200 @@ -28,7 +28,6 @@ #include "messages.h" #include "grouping-parser.h" -#include #include typedef struct _GroupLinesContext diff -ur syslog-ng-syslog-ng-4.2.0.orig/modules/correlation/radix.c syslog-ng-syslog-ng-4.2.0/modules/correlation/radix.c --- syslog-ng-syslog-ng-4.2.0.orig/modules/correlation/radix.c 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/modules/correlation/radix.c 2023-07-19 02:12:21.377815546 +0200 @@ -22,13 +22,12 @@ */ #include "radix.h" +#include "compat/pcre.h" #include #include #include -#include - /************************************************************** * Parsing nodes. **************************************************************/ @@ -129,80 +128,80 @@ typedef struct _RParserPCREState { - pcre *re; - pcre_extra *extra; + pcre2_code *re; } RParserPCREState; gboolean r_parser_pcre(gchar *str, gint *len, const gchar *param, gpointer state, RParserMatch *match) { RParserPCREState *self = (RParserPCREState *) state; + gboolean result = FALSE; gint rc; - gint num_matches; - - if (pcre_fullinfo(self->re, self->extra, PCRE_INFO_CAPTURECOUNT, &num_matches) < 0) - g_assert_not_reached(); - if (num_matches > LOGMSG_MAX_MATCHES) - num_matches = LOGMSG_MAX_MATCHES; - gsize matches_size = 3 * (num_matches + 1); - gint *matches = g_alloca(matches_size * sizeof(gint)); + pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(self->re, NULL); + rc = pcre2_match(self->re, (PCRE2_SPTR) str, (PCRE2_SIZE) strlen(str), 0, 0, match_data, NULL); - rc = pcre_exec(self->re, self->extra, str, strlen(str), 0, 0, matches, matches_size); - - if (rc == PCRE_ERROR_NOMATCH) - { - return FALSE; - } + if (rc == PCRE2_ERROR_NOMATCH) + goto exit; if (rc < 0) { msg_error("Error while matching regexp", evt_tag_int("error_code", rc)); - return FALSE; + goto exit; } if (rc == 0) { msg_error("Error while storing matching substrings"); - return FALSE; + goto exit; } + PCRE2_SIZE *matches = pcre2_get_ovector_pointer(match_data); + *len = matches[1] - matches[0]; - return TRUE; + result = TRUE; +exit: + pcre2_match_data_free(match_data); + return result; } gpointer r_parser_pcre_compile_state(const gchar *expr) { RParserPCREState *self = g_new0(RParserPCREState, 1); - const gchar *errptr; - gint erroffset; + gsize erroffset; gint rc; - self->re = pcre_compile2(expr, PCRE_ANCHORED, &rc, &errptr, &erroffset, NULL); + self->re = pcre2_compile((PCRE2_SPTR)expr, PCRE2_ZERO_TERMINATED, PCRE2_ANCHORED, &rc, &erroffset, NULL); if (!self->re) { + PCRE2_UCHAR error_message[128]; + + pcre2_get_error_message(rc, error_message, sizeof(error_message)); + msg_error("Error while compiling regular expression", evt_tag_str("regular_expression", expr), evt_tag_str("error_at", &expr[erroffset]), evt_tag_int("error_offset", erroffset), - evt_tag_str("error_message", errptr), + evt_tag_str("error_message", (gchar *) error_message), evt_tag_int("error_code", rc)); g_free(self); return NULL; } - self->extra = pcre_study(self->re, 0, &errptr); - if (errptr) + + /* optimize regexp */ + rc = pcre2_jit_compile(self->re, PCRE2_JIT_COMPLETE); + if (rc < 0) { - msg_error("Error while optimizing regular expression", - evt_tag_str("regular_expression", expr), - evt_tag_str("error_message", errptr)); - pcre_free(self->re); - if (self->extra) - pcre_free(self->extra); - g_free(self); - return NULL; + PCRE2_UCHAR error_message[128]; + + pcre2_get_error_message(rc, error_message, sizeof(error_message)); + msg_warning("radix: Error while JIT compiling regular expression", + evt_tag_str("regular_expression", expr), + evt_tag_str("error_message", (gchar *) error_message), + evt_tag_int("error_code", rc)); } + return (gpointer) self; } @@ -212,9 +211,7 @@ RParserPCREState *self = (RParserPCREState *) s; if (self->re) - pcre_free(self->re); - if (self->extra) - pcre_free(self->extra); + pcre2_code_free(self->re); g_free(self); } diff -ur syslog-ng-syslog-ng-4.2.0.orig/packaging/debian/control syslog-ng-syslog-ng-4.2.0/packaging/debian/control --- syslog-ng-syslog-ng-4.2.0.orig/packaging/debian/control 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/packaging/debian/control 2023-07-19 02:12:21.377815546 +0200 @@ -21,7 +21,7 @@ libbson-dev, libjson-c-dev | libjson0-dev, libwrap0-dev, - libpcre3-dev, + libpcre2-dev, libcap-dev [linux-any], libsystemd-dev (>= 209) [linux-any], libhiredis-dev, diff -ur syslog-ng-syslog-ng-4.2.0.orig/packaging/rhel/syslog-ng.spec syslog-ng-syslog-ng-4.2.0/packaging/rhel/syslog-ng.spec --- syslog-ng-syslog-ng-4.2.0.orig/packaging/rhel/syslog-ng.spec 2023-05-10 16:55:07.000000000 +0200 +++ syslog-ng-syslog-ng-4.2.0/packaging/rhel/syslog-ng.spec 2023-07-19 02:12:21.378815560 +0200 @@ -60,7 +60,7 @@ BuildRequires: libdbi-devel BuildRequires: libnet-devel BuildRequires: openssl-devel -BuildRequires: pcre-devel +BuildRequires: pcre2-devel BuildRequires: libuuid-devel BuildRequires: libesmtp-devel BuildRequires: libcurl-devel