Friday, 31 October 2014

RegExp in Python simple Eg

import re
Str1= "This is a test string.. 123123123 "
Str2= "<HTML> THis is a sample string having 1232123, %$%^$%$, ABCDE, asdfhg12312$ </HTML>      "
# reg= re.compile("\d*")
matches= re.match(r"\w+", Str1, 0)
if matches:
    print (matches.group())
else:
    print ("No match found")
   
print (type(matches))
print ("\n+++++++++++++++find all the matches using re.finditer--Group method+++++++++++++++\n")  
searches= re.finditer(r"\d{4,}", Str1)
for sea in searches:
    print (sea)
    print ("Each match: ", sea.group())

print ("\n+++++++++++++++find all the matches using re.finditer method+++++++++++++++\n")  
searches= re.finditer(r"\d{4,}", Str1)
for sea in searches:
    print ("Item found using FindIter: ", sea)


findAl= re.findall(r"\w+", Str2)
if findAl:
    print (findAl)
    for each in findAl:
        print ("Match Found: ", each)
        #print ("Match Found: ", each.group())   #each.group() is invalid with findall method..
        '''the above commented line throws an error.. '''

print ("\n+++++++++++++++find all the matches using re.findall method+++++++++++++++\n")  
   
Str2= "<HTML> THis is a sample string having 1232123, %$%^$%$, ABCDE, asdfhg12312$ </HTML>      "
se1= re.findall(r"[A-Z]{2,}", Str2)
print (se1) # Print all the upper case ones having more than two characters in a word..
#output: ['HTML', 'TH', 'ABCDE', 'HTML']

se1= re.findall(r"[A-Z]{3,}", Str2)
print (se1) # Print all the upper case ones having more than 3 characters in a word..
#output: ['HTML', 'ABCDE', 'HTML']


print ("++++++++++++++++++++++++++++++++++++++++++++++++++")


Output:
This
<class '_sre.SRE_Match'>

+++++++++++++++find all the matches using re.finditer--Group method+++++++++++++++

<_sre.SRE_Match object; span=(24, 33), match='123123123'>
Each match:  123123123

+++++++++++++++find all the matches using re.finditer method+++++++++++++++

Item found using FindIter:  <_sre.SRE_Match object; span=(24, 33), match='123123123'>
['HTML', 'THis', 'is', 'a', 'sample', 'string', 'having', '1232123', 'ABCDE', 'asdfhg12312', 'HTML']
Match Found:  HTML
Match Found:  THis
Match Found:  is
Match Found:  a
Match Found:  sample
Match Found:  string
Match Found:  having
Match Found:  1232123
Match Found:  ABCDE
Match Found:  asdfhg12312
Match Found:  HTML

+++++++++++++++find all the matches using re.findall method+++++++++++++++

['HTML', 'TH', 'ABCDE', 'HTML']
['HTML', 'ABCDE', 'HTML']
++++++++++++++++++++++++++++++++++++++++++++++++++

No comments:

Post a Comment