others-Python script to check sensitive keywords in an article

1. Purpose

In this post, I will demonstrate how to check sensitive keywords by using python.

2. Solution

Here is the example script:


sensitive_words="bad_keyword1|bad_keyword2|bad_keyword3" #the regex pattern that match any of the keywords between |

def check_last_modified_files():

    files = glob.glob(FILE_HOME+"/**",recursive=True) #recursively list the files in the directory
    modified_files = list() #construct an empty list
    current_time = time.time() #get current time

    for the_file in files: # iterate over the files and collect the files that changed in 12 hours
        time_delta = current_time - os.path.getmtime(the_file)
        time_delta_hours = time_delta / (60*60)
        if time_delta_hours < 12:
    return modified_files

def is_sensitive():
    modified_files = check_last_modified_files()
    print("got "+str(len(modified_files))+" changed files")
    for mfile in modified_files:
        if not os.path.isfile(mfile): continue  #check if the file is a directory or a file, if it is not a file, do not process it
        print("checking "+mfile+"...")
        file = open(mfile, 'r') #open the file for read
            lines = file.readlines() #read the lines in the file
            line_count = 0
            for line in lines:
                line_count= line_count+1
                if re.search(sensitive_words,line,re.IGNORECASE): #check the content with the keywords, using re.IGNORECASE to be case insensitive
                    print("\nfind sensitive line:\n"+line+"\nin file:"+mfile+":"+str(line_count))
                    return True #if sensitive keyword is found, break the process

            print('unexpected error %s' % mfile, sys.exc_info())
    return False

You can find the full example code here: https://github.com/bswen/bswen-python-project/tree/main/utils

3. Summary

In this post, I demonstrated how to check keywords using python by matching file content with regex patterns. That’s it, thanks for your reading.