pi2bz: implement more accurate trigger for commands

We use pi_query when looking for interesting threads, but we can have
false-positives (e.g. someone talking about bugbot trigger words, not
someone actually issuing bot commands).

This implements pi_trigger_regexes so we can separate commands (on a
separate line) from false-positives.

Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
diff --git a/TODO.otl b/TODO.otl
new file mode 100644
index 0000000..055f0bc
--- /dev/null
+++ b/TODO.otl
@@ -0,0 +1,14 @@
+Bugzilla-to-PI
+----------------------------------------
+[_] Newly added comments without an (In reply to) should thread the latest comment
+[_] Collect all recipients for the initial bug tracking notification
+
+
+PI-to-Bugzilla
+----------------------------------------
+[X] Trigger bugbot on more precise phrases
+
+
+Documentation
+----------------------------------------
+[_] Write basic README
diff --git a/default.config.toml b/default.config.toml
index 993b9af..38905c4 100644
--- a/default.config.toml
+++ b/default.config.toml
@@ -33,7 +33,9 @@
 pi_query = '(nq:"bugbot on" OR nq:"bugbot assign")'
 pi_must_bz_groups = ['editbugs']
 pi_url = 'https://lore.kernel.org/all/'
-pi_assign_regex = '^bugbot assign to (\S+)'
+# These are always multiline, case-insensitive
+pi_trigger_regexes = ['^bugbot on\s*$', '^bugbot assign to \S+$']
+pi_assign_regexes = ['^bugbot assign to (\S+)']
 bz_new_bugs_quicksearch = 'OPEN flag:bugbot+'
 bz_privacy_mode = true
 alwayscc = ['bugs@lists.linux.dev']
diff --git a/peebz/__init__.py b/peebz/__init__.py
index d9dc238..eb24111 100644
--- a/peebz/__init__.py
+++ b/peebz/__init__.py
@@ -636,8 +636,7 @@
     return author
 
 
-def msg_parse_for_bug(msg: email.message.EmailMessage) -> Tuple[str, Tuple[str, str], str, str, List[Dict]]:
-    msgid = b4.LoreMessage.get_clean_msgid(msg)
+def msg_get_payload(msg: email.message.EmailMessage, strip_quoted: bool = False) -> str:
     mp = msg.get_body(preferencelist=('plain',))
     bbody = mp.get_payload(decode=True)
     cs = mp.get_content_charset()
@@ -647,6 +646,19 @@
     # Strip signature if we find it
     chunks = cpay.rsplit('\n-- \n', maxsplit=1)
     cbody = chunks[0]
+    if not strip_quoted:
+        return cbody
+
+    stripped = list()
+    for line in cbody.splitlines():
+        if not line.startswith('> '):
+            stripped.append(line)
+    return '\n'.join(stripped)
+
+
+def msg_parse_for_bug(msg: email.message.EmailMessage) -> Tuple[str, Tuple[str, str], str, str, List[Dict]]:
+    msgid = b4.LoreMessage.get_clean_msgid(msg)
+    cbody = msg_get_payload(msg)
     lsub = b4.LoreSubject(msg.get('Subject', ''))
     subject = lsub.subject
     atts = msg_get_valid_attachments(msg)
diff --git a/peebz/parse.py b/peebz/parse.py
index 07de9e5..f6785cc 100644
--- a/peebz/parse.py
+++ b/peebz/parse.py
@@ -9,7 +9,7 @@
 import b4
 import re
 
-from typing import Tuple, Dict
+from typing import Tuple, Dict, List
 
 import email.message
 import email.utils
@@ -116,6 +116,31 @@
     return cid
 
 
+def get_assignee(msg: email.message.EmailMessage, regexes: List[str]) -> str:
+    payload = peebz.msg_get_payload(msg)
+    fromaddr = peebz.msg_get_author(msg)[1]
+    assignee = None
+
+    for regex in regexes:
+        matches = re.search(regex, payload, flags=re.I | re.M)
+        if matches:
+            assignee = matches.groups()[0]
+            if assignee == 'me':
+                logger.debug('me=%s', fromaddr)
+                assignee = fromaddr
+            # Does this user exist?
+            try:
+                peebz.bz_get_user(assignee)
+                logger.debug('found assignee=%s (matched regex: %s)', assignee, regex)
+                # First match wins
+                break
+            except LookupError:
+                logger.info('Unable to assign to %s: no such user', assignee)
+                assignee = None
+
+    return assignee
+
+
 def process_rfc2822(msg: email.message.EmailMessage, product: str, component: str,
                     dry_run: bool = False) -> None:
     # Ignore any messages that have an X-Bugzilla-Product header,
@@ -159,28 +184,21 @@
                 new_bug_notification(bid, cid, dry_run=dry_run)
 
     # Do we have any assign triggers?
-    assign_re = cconf.get('pi_assign_regex')
-    if assign_re:
-        matches = re.search(assign_re, msg.as_string(), flags=re.I | re.M)
-        if matches:
+    assign_res = cconf.get('pi_assign_regexes')
+    if assign_res:
+        assignee = get_assignee(msg, assign_res)
+        if assignee:
+            # Is this person allowed to set assignees?
             author = peebz.msg_get_author(msg)
             fromaddr = author[1]
             if peebz.bz_check_user_allowed(fromaddr, product, component):
-                assign_to = matches.groups()[0]
-                if assign_to == 'me':
-                    logger.debug('me=%s', fromaddr)
-                    assign_to = fromaddr
-                # Does this user exist?
-                try:
-                    peebz.bz_get_user(assign_to)
-                    if not dry_run:
-                        peebz.bz_assign_bug(bid, assign_to)
-                    else:
-                        logger.debug('---DRY RUN---')
-                        logger.debug('Would have assigned bid=%s to %s', bid, assign_to)
-
-                except LookupError:
-                    logger.info('Unable to assign %s to %s: no such user', bid, assign_to)
+                if not dry_run:
+                    peebz.bz_assign_bug(bid, assignee)
+                else:
+                    logger.debug('---DRY RUN---')
+                    logger.debug('Would have assigned bid=%s to %s', bid, assignee)
+            else:
+                logger.debug('User %s is not allowed to set assignees', fromaddr)
 
 
 def main(cmdargs: argparse.Namespace) -> None:
diff --git a/peebz/pi2bz.py b/peebz/pi2bz.py
index 36f07d3..240f64c 100644
--- a/peebz/pi2bz.py
+++ b/peebz/pi2bz.py
@@ -11,6 +11,7 @@
 import email.message
 import gzip
 import datetime
+import re
 
 from typing import List, Set
 
@@ -153,8 +154,24 @@
                 author = peebz.msg_get_author(msg)
                 fromaddr = author[1]
                 if not peebz.bz_check_user_allowed(fromaddr, product, component):
-                    logger.debug('skipping msg %s', msg.get('Subject'))
+                    logger.debug('author=%s not allowed, skipping msg %s', fromaddr, msg.get('Subject'))
                     continue
+                # Check fine trigger, if configured
+                trigger_res = cconf.get('pi_trigger_regexes', list())
+                if trigger_res:
+                    payload = peebz.msg_get_payload(msg)
+                    found = False
+                    for trigger_re in trigger_res:
+                        matches = re.search(trigger_re, payload, flags=re.I | re.M)
+                        if matches:
+                            logger.debug('found trigger_regex: %s', trigger_re)
+                            found = True
+                            break
+
+                    if not found:
+                        logger.debug('trigger_regexes not found, skipping msg %s', msg.get('Subject'))
+                        continue
+
                 # Retrieve and queue up the entire thread
                 try:
                     tmsgs = get_sorted_thread(url, msgid)