##Regular expressions are specially encoded text strings used as patterns for matching sets of strings
##Sets for regular expressions
# [:alnum:] Alphanumeric characters: [:alpha:] and [:digit:].

# [:alpha:] Alphabetic characters: [:lower:] and [:upper:].

# [:blank:] Blank characters: space and tab, and possibly other locale-dependent characters such as non-breaking space.

# [:digit:] Digits: 0 1 2 3 4 5 6 7 

# [:lower:] Lower-case letters in the current locale.

# [:upper:] Upper-case letters in the current locale.

#[:punct:] Punctuation characters:   ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~.

##Rules for regular expressions

# ? The preceding item is optional and will be matched at most once.

# * The preceding item will be matched zero or more times.

# + The preceding item will be matched one or more times.

# {n} The preceding item is matched exactly n times.

# {n,} The preceding item is matched n or more times.

# {n,m} The preceding item is matched at least n times, but not more than m times.

library(nycflights13)
library(tidyverse)
(dec25 = filter (flights, month == 12, day == 25))
flights
select(flights, matches("dep_[[:digit:]]*")) #dep_ is followed by 0 or more digits   
select(flights, matches("dep_[[:digit:]]+")) #dep_ is followed by one or more digits  
select(flights, matches("[[:alpha:]]+_[[:alpha:]]+"))    #it is different by select(flights, matches("_")) 
 

                                                        #since at least a letter must be before and after "_"
select(flights, matches("_"))


tmp_flights=mutate(flights,'tmp_'=dep_time)
select(tmp_flights, matches("[[:alpha:]]+_[[:alpha:]]+"))
select(tmp_flights, matches("_"))


select(flights, matches("[r]{2}"))  #all strings with two 'r' consecutive
select(flights, matches("_[[:alpha:]]*[r]{2}"))  # all strings with "_" followed by two 'r' consecutive and 
select(flights, matches("time"))  #all string with 'time'
select(flights, matches("a_time")) #all string with 'a_time'
select(flights, matches("a[[:alpha:]]+_time")) #all string with 'a' followed by at least one Alphabetic characters
                                               #followed by time
select(flights, matches("^a[[:alpha:]]+_time"))  #all string starting 'a' followed by at least one Alphabetic characters
#followed by time
flights


txt <- c("arm","foot foot","lefroo", "bafoobar","a")
grep("a[[:alpha:]]+",txt) #vector position in which is present a substring with 'a' followed by at least one Alphabetic characters
grep("^a[[:alpha:]]+",txt) #vector position in which the string starts with 'a' followed by at least one Alphabetic characters

grep("[[:blank:]]",txt)