#!/usr/bin/crm # # a whitelist / blacklist / command based mail sorter # # Designed to be used with procmail or to be used as a filter # with a MTA. Inserts a header into the message that can be # used with MUA. # ############################################################## # for testing purposes, log all incoming mail output [allmail.txt] /:*:_dw:/ # # # --- include the configuration file --- insert mailfilterconfig.crm # # --- make a safe space to keep the results of our work --- # isolate (:classifier_reason:) alter (:classifier_reason:) /no reason yet/ # isolate (:our_exit_code:) alter (:our_exit_code:) /0/ # # isolate (:m_text:) alter (:m_text:) // # # -------------check for the COMMAND WORD ---------- # { # # grab the command word as :c:, password as :pw:, and any arg(s) as :a: # match (:z: :pw: :c: ) [:_dw:] /^command ([[:graph:]]+) (.*)/ # # check the password. If it's invalid, FAIL out of this bracket set # and just treat this as ordinary (non-command) mail. match [:pw:] /:*:spw:/ { # was it a command to add something to the whitelist? match (:q: :a:) [:c:] /whitelist (.*)/ output [whitelist.mfp] /:*:a::*:_nl:/ alter (:z:) /*** :*:z: *** :*:_nl:Whitelist command executed! :*:_nl:/ accept exit /0/ } { # was it a command to add something to the blacklist? match (:q: :a:) [:c:] /blacklist (.*)/ output [blacklist.mfp] /:*:a::*:_nl:/ alter (:z:) /*** :*:z: *** :*:_nl:Blacklist command executed! :*:_nl:/ accept exit /0/ } { # was it a command to learn something as nonspam? match [:c:] /nonspam/ match (:z: :text:) [:_dw:] /:*:_nl:command [[:graph:]]+ nonspam(.*)/ # and learn it as nonspam output [nonspamtext.txt] /:*:text:/ learn (nonspam.css) [:text:] /:*:lcr:/ alter (:z:) /:*:_nl:*** Learn nonspam executed! *** :*:_nl: :*:z:/ accept exit /0/ } { # was it a command to learn something as spam? match [:c:] /spam/ match (:z: :text:) [:_dw:] /:*:_nl:command [[:graph:]]+ spam(.*)/ # and learn it as spam output [spamtext.txt] /:*:text:/ learn (spam.css) [:text:] /:*:lcr:/ alter (:z:) /:*:_nl:*** Learn spam executed! *** :*:_nl: :*:z:/ accept exit /0/ } { # was it a command to learn something as an arbitrary type? # Note: the files this generates don't get used for anything yet. # match [:c:] /learn/ match (:z: :file: :text:) [:_dw:] /:*:_nl:command [[:graph:]]+ learn ([[:graph:]]+)(.*)/ # and learn it output [:*:file:.txt] /:*:text:/ learn (:*:file:.css) [:text:] /:*:lcr:/ alter (:z:) /:*:_nl:*** Learn to class :*:file: executed! ***:*:_nl: :*:z:/ accept exit /0/ } } # none of the above - classify this incoming mail instead. # first according to priority action list, # then according to whitelist, # then according to blacklist, # then according to the CRM sparse spectral classifier. # # check it against the priority action list- this list is # of the form of a + or -, then a pattern. + means accept, # - means reject. These are executed in order (which is # different from whitelist or blacklist in that they occur # in order given, not whitelist-then-blacklist. The priority # action list is tried before whitelist or blacklist. # isolate (:priolist:) input (:priolist:) [priolist.mfp] # reset matching on :priolist: to the start of the string match [:priolist:] // # # { #... Grab the next regexturn the one-per-line patterns into a regex match (:w: :pm: :pr:) [:priolist:] /(.)(.+)/ #... see if this regex matches the incoming mail { match (:reason:) /:*:pr:/ # Yep, it matched... branch based on pm # { match [:pm:] /[+]/ # put in a little tag saying why prio-listed alter (:classifier_reason:) /ACCEPT: CRM114 Priority Whitelisted by: :*:reason: **:*:_nl:/ output [accepted_by_whitelist.txt] /X-CRM114-Status: :*:classifier_reason::*:_dw:/ goto /:looks_good:/ } # No, we didn't have a +, so it was a - and we reject. alter (:classifier_reason:) /REJECT: CRM114 Priority Blacklisted by: :*:reason: **:*:_nl:/ output [rejected_by_blacklist.txt] /X-CRM114-Status: :*:classifier_reason::*:_dw:/ goto /:looks_bad:/ # # } # Nope, didn't match as a priorityk... grab the next regex liaf } # # # check it against the whitelist... load the whitelist... isolate (:whitelist:) input (:whitelist:) [whitelist.mfp] # reset matching on whitelist to start of string match [:whitelist:] // # # { #... Grab the next regexturn the one-per-line patterns into a regex match (:waste: :whregex:) [:whitelist:] /(.+)/ #... see if this regex matches the incoming mail { match (:reason:) /:*:whregex:/ # Yep, it matched... whitelist this email # # put in a little tag saying why whitelisted: alter (:classifier_reason:) /ACCEPT: CRM114 Whitelisted by: :*:reason: **:*:_nl:/ output [accepted_by_whitelist.txt] /X-CRM114-Status: :*:classifier_reason::*:_dw:/ goto /:looks_good:/ } # Nope, didn't match... grab the next regex and try again, liaf } # # No joy, maybe we should blacklist it. # # check it against the blacklist isolate (:blacklist:) input (:blacklist:) [blacklist.mfp] # reset matching on blacklist to start of string match [:blacklist:] // # { #... Grab the next regexturn the one-per-line patterns into a regex match (:waste: :blregex:) [:blacklist:] /(.+)/ #... see if this regex matches the incoming mail { match (:reason:) /:*:blregex:/ # Yep, it matched... blacklist this email # # put in a little tag saying why blacklisted alter (:classifier_reason:) /REJECT: CRM114 Blacklisted by: :*:reason: ** :*:_nl:/ output [rejected_by_blacklist.txt] /X-CRM114-Status: :*:classifier_reason::*:_dw:/ goto /:looks_bad:/ } # Nope, didn't match... grab the next regex and try again liaf } # # # OK, it wasn't a command, wasn't whitelisted, wasn't blacklisted, # so it's time to pull out the learning heuristics and let 'er rip. { # m_text is "mutilated text" - the result of all our # machinations and hackages. It's a _copy_ of the incoming # text, for mutilation alter (:m_text:) /:*:_dw:/ # expansion 1: - do we perform mime expansions? { match [:do_normalize:] /yes/ # # Use external program to decode mime, remove most # headers, and change the charset to UTF-8 # isolate (:exp_text:) syscall (:*:m_text:) (:exp_text:) /:*:mime_decoder:/ alter (:m_text:) /:*:exp_text:/ } # # all done # Run the CSS classifier against the "expanded" text - # if it classifies as SPAM # then output into "rejected.txt". # isolate (:stats:) { classify ( nonspam.css | spam.css ) ( :stats: ) [:m_text:] /:*:lcr:/ alter (:classifier_reason:) /ACCEPT: CRM114 PASS SBPH\/BCR TEST** :*:_nl::*:stats:/ output [accepted_by_css.txt] /:*:_dw::*:_nl::*:classifier_reason::*:_nl: - armor-pierced and decommented text was -:*:_nl::*:m_text::*:_nl:-0-0-0-0-0-0-0-0-0-0-0-0-0-:*:_nl::*:_nl:/ goto /:looks_good:/ } alter (:classifier_reason:) /REJECT: CRM114 FAIL SBPH\/BCR TEST** :*:_nl::*:stats:/ output [rejected_by_css.txt] /:*:_dw::*:_nl::*:classifier_reason::*:_nl: - armor-pierced and decommented text was -:*:_nl::*:m_text::*:_nl:-0-0-0-0-0-0-0-0-0-0-0-0-0-:*:_nl::*:_nl:/ goto /:looks_bad:/ } # # # Final wrap-up routines - dispose of the mail as appropriate. # { :looks_bad: # # Remove line feeds from the status message # isolate (:header_reason:) alter (:header_reason:) /:*:classifier_reason:/ { match [:header_reason:] (:n:) /:*:_nl:+/ alter (:n:) / / liaf } # # Insert the status message as a header, right after the # first line (the From line) of the headers. This assumes # we have more than one header line. # { match [:_dw:] (:secondlinestart:) /:*:_nl:[[:alpha:]]/ alter (:secondlinestart:) /:*:_nl:X-CRM114-Status: :*:header_reason::*:secondlinestart:/ } accept alter (:our_exit_code:) /:*:rejected_mail_exit_code:/ goto /:finish_up:/ } # # and here's where we accept something as good email. { :looks_good: # # Remove line feeds from the status message # isolate (:header_reason:) alter (:header_reason:) /:*:classifier_reason:/ { match [:header_reason:] (:n:) /:*:_nl:+/ alter (:n:) / / liaf } # # Insert the status message as a header, right after the # first line (the From line) of the headers. This assumes # we have more than one header line. # { match [:_dw:] (:secondlinestart:) /:*:_nl:[[:alpha:]]/ alter (:secondlinestart:) /:*:_nl:X-CRM114-Status: :*:header_reason::*:secondlinestart:/ } accept alter (:our_exit_code:) /0/ goto /:finish_up:/ } # # Here's where we finish up processing in all the paths. # :finish_up: { # ---- should we consider automatic training? match [:automatic_training:] /yes/ # bounce out if we've already auto-trained this email match /AUTO-TRAINED/ isolate (:msghash:) hash (:msghash:) /:*:_dw:/ # pick one in 16- here, if the second-to-last digit is a 0 match [:msghash:] /......0./ # # out put autotraining... # Yep... we should use this for autotraining # do we auto-train on acceptance? { match [:classifier_reason:] /^ACCEPT/ # it wasn't spam... autotrain it "nonspam" output [nonspamtext.txt] /:*:text:/ learn (nonspam.css) [:m_text:] /:*:lcr:/ goto /:autotrain_finish:/ } # or do we autotran on rejection { match [:classifier_reason:] /^REJECT/ # it was spam... autotrain it "spam" output [spamtext.txt] /:*:text:/ learn (spam.css) [:m_text:] /:*:lcr:/ goto /:autotrain_finish:/ } :autotrain_finish: syscall (:*:classifier_reason: :*:_nl: :*:_dw:) /mail -s "AUTO-TRAINED email - please check" :*:autotrain_address:/ } :exit_here: exit /:*:our_exit_code:/