| .\" Copyright (C), 1995, Graeme W. Wilford. (Wilf.) |
| .\" |
| .\" %%%LICENSE_START(VERBATIM) |
| .\" Permission is granted to make and distribute verbatim copies of this |
| .\" manual provided the copyright notice and this permission notice are |
| .\" preserved on all copies. |
| .\" |
| .\" Permission is granted to copy and distribute modified versions of this |
| .\" manual under the conditions for verbatim copying, provided that the |
| .\" entire resulting derived work is distributed under the terms of a |
| .\" permission notice identical to this one. |
| .\" |
| .\" Since the Linux kernel and libraries are constantly changing, this |
| .\" manual page may be incorrect or out-of-date. The author(s) assume no |
| .\" responsibility for errors or omissions, or for damages resulting from |
| .\" the use of the information contained herein. The author(s) may not |
| .\" have taken the same level of care in the production of this manual, |
| .\" which is licensed free of charge, as they might when working |
| .\" professionally. |
| .\" |
| .\" Formatted or processed versions of this manual, if unaccompanied by |
| .\" the source, must acknowledge the copyright and authors of this work. |
| .\" %%%LICENSE_END |
| .\" |
| .\" Wed Jun 14 16:10:28 BST 1995 Wilf. (G.Wilford@ee.surrey.ac.uk) |
| .\" Tiny change in formatting - aeb, 950812 |
| .\" Modified 8 May 1998 by Joseph S. Myers (jsm28@cam.ac.uk) |
| .\" |
| .\" show the synopsis section nicely |
| .TH REGEX 3 2021-03-22 "GNU" "Linux Programmer's Manual" |
| .SH NAME |
| regcomp, regexec, regerror, regfree \- POSIX regex functions |
| .SH SYNOPSIS |
| .nf |
| .B #include <regex.h> |
| .PP |
| .BI "int regcomp(regex_t *restrict " preg ", const char *restrict " regex , |
| .BI " int " cflags ); |
| .BI "int regexec(const regex_t *restrict " preg \ |
| ", const char *restrict " string , |
| .BI " size_t " nmatch ", regmatch_t " pmatch "[restrict]\ |
| , int " eflags ); |
| .PP |
| .BI "size_t regerror(int " errcode ", const regex_t *restrict " preg , |
| .BI " char *restrict " errbuf ", size_t " errbuf_size ); |
| .BI "void regfree(regex_t *" preg ); |
| .fi |
| .SH DESCRIPTION |
| .SS POSIX regex compiling |
| .BR regcomp () |
| is used to compile a regular expression into a form that is suitable |
| for subsequent |
| .BR regexec () |
| searches. |
| .PP |
| .BR regcomp () |
| is supplied with |
| .IR preg , |
| a pointer to a pattern buffer storage area; |
| .IR regex , |
| a pointer to the null-terminated string and |
| .IR cflags , |
| flags used to determine the type of compilation. |
| .PP |
| All regular expression searching must be done via a compiled pattern |
| buffer, thus |
| .BR regexec () |
| must always be supplied with the address of a |
| .BR regcomp () |
| initialized pattern buffer. |
| .PP |
| .I cflags |
| is the |
| .RB bitwise- or |
| of zero or more of the following: |
| .TP |
| .B REG_EXTENDED |
| Use |
| .B POSIX |
| Extended Regular Expression syntax when interpreting |
| .IR regex . |
| If not set, |
| .B POSIX |
| Basic Regular Expression syntax is used. |
| .TP |
| .B REG_ICASE |
| Do not differentiate case. |
| Subsequent |
| .BR regexec () |
| searches using this pattern buffer will be case insensitive. |
| .TP |
| .B REG_NOSUB |
| Do not report position of matches. |
| The |
| .I nmatch |
| and |
| .I pmatch |
| arguments to |
| .BR regexec () |
| are ignored if the pattern buffer supplied was compiled with this flag set. |
| .TP |
| .B REG_NEWLINE |
| Match-any-character operators don't match a newline. |
| .IP |
| A nonmatching list |
| .RB ( [\(ha...] ) |
| not containing a newline does not match a newline. |
| .IP |
| Match-beginning-of-line operator |
| .RB ( \(ha ) |
| matches the empty string immediately after a newline, regardless of |
| whether |
| .IR eflags , |
| the execution flags of |
| .BR regexec (), |
| contains |
| .BR REG_NOTBOL . |
| .IP |
| Match-end-of-line operator |
| .RB ( $ ) |
| matches the empty string immediately before a newline, regardless of |
| whether |
| .I eflags |
| contains |
| .BR REG_NOTEOL . |
| .SS POSIX regex matching |
| .BR regexec () |
| is used to match a null-terminated string |
| against the precompiled pattern buffer, |
| .IR preg . |
| .I nmatch |
| and |
| .I pmatch |
| are used to provide information regarding the location of any matches. |
| .I eflags |
| is the |
| .RB bitwise- or |
| of zero or more of the following flags: |
| .TP |
| .B REG_NOTBOL |
| The match-beginning-of-line operator always fails to match (but see the |
| compilation flag |
| .B REG_NEWLINE |
| above). |
| This flag may be used when different portions of a string are passed to |
| .BR regexec () |
| and the beginning of the string should not be interpreted as the |
| beginning of the line. |
| .TP |
| .B REG_NOTEOL |
| The match-end-of-line operator always fails to match (but see the |
| compilation flag |
| .B REG_NEWLINE |
| above). |
| .TP |
| .B REG_STARTEND |
| Use |
| .I pmatch[0] |
| on the input string, starting at byte |
| .I pmatch[0].rm_so |
| and ending before byte |
| .IR pmatch[0].rm_eo . |
| This allows matching embedded NUL bytes |
| and avoids a |
| .BR strlen (3) |
| on large strings. |
| It does not use |
| .I nmatch |
| on input, and does not change |
| .B REG_NOTBOL |
| or |
| .B REG_NEWLINE |
| processing. |
| This flag is a BSD extension, not present in POSIX. |
| .SS Byte offsets |
| Unless |
| .B REG_NOSUB |
| was set for the compilation of the pattern buffer, it is possible to |
| obtain match addressing information. |
| .I pmatch |
| must be dimensioned to have at least |
| .I nmatch |
| elements. |
| These are filled in by |
| .BR regexec () |
| with substring match addresses. |
| The offsets of the subexpression starting at the |
| .IR i th |
| open parenthesis are stored in |
| .IR pmatch[i] . |
| The entire regular expression's match addresses are stored in |
| .IR pmatch[0] . |
| (Note that to return the offsets of |
| .I N |
| subexpression matches, |
| .I nmatch |
| must be at least |
| .IR N+1 .) |
| Any unused structure elements will contain the value \-1. |
| .PP |
| The |
| .I regmatch_t |
| structure which is the type of |
| .I pmatch |
| is defined in |
| .IR <regex.h> . |
| .PP |
| .in +4n |
| .EX |
| typedef struct { |
| regoff_t rm_so; |
| regoff_t rm_eo; |
| } regmatch_t; |
| .EE |
| .in |
| .PP |
| Each |
| .I rm_so |
| element that is not \-1 indicates the start offset of the next largest |
| substring match within the string. |
| The relative |
| .I rm_eo |
| element indicates the end offset of the match, |
| which is the offset of the first character after the matching text. |
| .SS POSIX error reporting |
| .BR regerror () |
| is used to turn the error codes that can be returned by both |
| .BR regcomp () |
| and |
| .BR regexec () |
| into error message strings. |
| .PP |
| .BR regerror () |
| is passed the error code, |
| .IR errcode , |
| the pattern buffer, |
| .IR preg , |
| a pointer to a character string buffer, |
| .IR errbuf , |
| and the size of the string buffer, |
| .IR errbuf_size . |
| It returns the size of the |
| .I errbuf |
| required to contain the null-terminated error message string. |
| If both |
| .I errbuf |
| and |
| .I errbuf_size |
| are nonzero, |
| .I errbuf |
| is filled in with the first |
| .I "errbuf_size \- 1" |
| characters of the error message and a terminating null byte (\(aq\e0\(aq). |
| .SS POSIX pattern buffer freeing |
| Supplying |
| .BR regfree () |
| with a precompiled pattern buffer, |
| .I preg |
| will free the memory allocated to the pattern buffer by the compiling |
| process, |
| .BR regcomp (). |
| .SH RETURN VALUE |
| .BR regcomp () |
| returns zero for a successful compilation or an error code for failure. |
| .PP |
| .BR regexec () |
| returns zero for a successful match or |
| .B REG_NOMATCH |
| for failure. |
| .SH ERRORS |
| The following errors can be returned by |
| .BR regcomp (): |
| .TP |
| .B REG_BADBR |
| Invalid use of back reference operator. |
| .TP |
| .B REG_BADPAT |
| Invalid use of pattern operators such as group or list. |
| .TP |
| .B REG_BADRPT |
| Invalid use of repetition operators such as using \(aq*\(aq |
| as the first character. |
| .TP |
| .B REG_EBRACE |
| Un-matched brace interval operators. |
| .TP |
| .B REG_EBRACK |
| Un-matched bracket list operators. |
| .TP |
| .B REG_ECOLLATE |
| Invalid collating element. |
| .TP |
| .B REG_ECTYPE |
| Unknown character class name. |
| .TP |
| .B REG_EEND |
| Nonspecific error. |
| This is not defined by POSIX.2. |
| .TP |
| .B REG_EESCAPE |
| Trailing backslash. |
| .TP |
| .B REG_EPAREN |
| Un-matched parenthesis group operators. |
| .TP |
| .B REG_ERANGE |
| Invalid use of the range operator; for example, the ending point of the range |
| occurs prior to the starting point. |
| .TP |
| .B REG_ESIZE |
| Compiled regular expression requires a pattern buffer larger than 64\ kB. |
| This is not defined by POSIX.2. |
| .TP |
| .B REG_ESPACE |
| The regex routines ran out of memory. |
| .TP |
| .B REG_ESUBREG |
| Invalid back reference to a subexpression. |
| .SH ATTRIBUTES |
| For an explanation of the terms used in this section, see |
| .BR attributes (7). |
| .ad l |
| .nh |
| .TS |
| allbox; |
| lbx lb lb |
| l l l. |
| Interface Attribute Value |
| T{ |
| .BR regcomp (), |
| .BR regexec () |
| T} Thread safety MT-Safe locale |
| T{ |
| .BR regerror () |
| T} Thread safety MT-Safe env |
| T{ |
| .BR regfree () |
| T} Thread safety MT-Safe |
| .TE |
| .hy |
| .ad |
| .sp 1 |
| .SH CONFORMING TO |
| POSIX.1-2001, POSIX.1-2008. |
| .SH EXAMPLES |
| .EX |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <regex.h> |
| |
| #define ARRAY_SIZE(arr) (sizeof((arr)) / sizeof((arr)[0])) |
| |
| static const char *const str = |
| "1) John Driverhacker;\en2) John Doe;\en3) John Foo;\en"; |
| static const char *const re = "John.*o"; |
| |
| int main(void) |
| { |
| static const char *s = str; |
| regex_t regex; |
| regmatch_t pmatch[1]; |
| regoff_t off, len; |
| |
| if (regcomp(®ex, re, REG_NEWLINE)) |
| exit(EXIT_FAILURE); |
| |
| printf("String = \e"%s\e"\en", str); |
| printf("Matches:\en"); |
| |
| for (int i = 0; ; i++) { |
| if (regexec(®ex, s, ARRAY_SIZE(pmatch), pmatch, 0)) |
| break; |
| |
| off = pmatch[0].rm_so + (s \- str); |
| len = pmatch[0].rm_eo \- pmatch[0].rm_so; |
| printf("#%d:\en", i); |
| printf("offset = %jd; length = %jd\en", (intmax_t) off, |
| (intmax_t) len); |
| printf("substring = \e"%.*s\e"\en", len, s + pmatch[0].rm_so); |
| |
| s += pmatch[0].rm_eo; |
| } |
| |
| exit(EXIT_SUCCESS); |
| } |
| .EE |
| .SH SEE ALSO |
| .BR grep (1), |
| .BR regex (7) |
| .PP |
| The glibc manual section, |
| .I "Regular Expressions" |