# extrakto filter definitions # Define your own filters in ~/.config/extrakto/extrakto.conf # To override an existing filter just specify the new values. # For example, if you prefer to split words on comma as well you can define: # [word] # regex: ([^][(){}=$─-➿-, \t\n\r]+) # define a section per filter # each filter must have at least a regex containing one or more capture groups # regex: a python regex expression # enabled: is filter active (default True) # in_all: is included in --all (default True) # lstrip: characters to strip from left result # rstrip: characters to strip from right result # exclude: exclude result if matching # alt2-9: alternate result (see url) [word] # "words" consist of anything but the following characters: # [](){}=$ # unicode range 2500-27BF which includes: # - Box Drawing # - Block Elements # - Geometric Shapes # - Miscellaneous Symbols # - Dingbats # unicode range E000-F8FF (private use/Powerline) # common editor 'whitespace' characters: ⋅↴│ # and whitespace ( \t\n\r) # regex: [^][(){}=$\u2500-\u27BF\uE000-\uF8FF \t\n\r]+ regex: ([^][(){}=$─-➿-⋅↴│ \t\n\r]+) lstrip: ,:;()[]{}<>'"| rstrip: ,:;()[]{}<>'"|. in_all: False [path] # separator: (?=[ \t\n]|"|\(|\[|<|\')? # optionally starts with: (~|/)? regex: (?:[ \t\n\"([<':]|^)(~|/)?([-~a-zA-Z0-9_+-,.]+/[^ \t\n\r|:"'$%&)>\]]*) # exclude transfer speeds like 5k/s or m/s, and page 1/2 exclude: [kmgKMG]/s$|^\d+/\d+$ # remove invalid end characters (like punctuation or markdown syntax) rstrip: ",):" [url] regex: (https?://|git@|git://|ssh://|s*ftp://|file:///)([a-zA-Z0-9?=%/_.:,;~@!#$&()*+-]*) alt2: ://([^/? ]+) # remove invalid end characters (like punctuation or markdown syntax) rstrip: ",):" [quote] regex: ("[^"\n\r]+") alt2: "([^"\n\r]+)" [s-quote] regex: ('[^'\n\r]+') alt2: '([^'\n\r]+)'