[WIP] Issue #17
This commit is contained in:
parent
82ed71f23c
commit
e3b71d7204
|
@ -14,7 +14,7 @@ all=.!?;()\-—"[]{}«»/*&^#$
|
|||
[logging]
|
||||
level=INFO
|
||||
|
||||
[links]
|
||||
[media_checker]
|
||||
lifetime=28800.0
|
||||
stickers=BQADAgADGwEAAjbsGwVVGLVNyOWfuwI
|
||||
|
||||
|
@ -23,6 +23,8 @@ mode=polling
|
|||
host=
|
||||
port=
|
||||
url=
|
||||
key=
|
||||
cert=
|
||||
|
||||
[redis]
|
||||
host=
|
||||
|
|
|
@ -10,7 +10,7 @@ from src.handler.status_handler import StatusHandler
|
|||
from src.service.chat_purge_queue import ChatPurgeQueue
|
||||
from src.service.data_learner import DataLearner
|
||||
from src.service.reply_generator import ReplyGenerator
|
||||
from src.service.links_checker import LinksChecker
|
||||
from src.service.media_uniqueness_checker import MediaUniquenessChecker
|
||||
from src.service.chance_manager import ChanceManager
|
||||
|
||||
|
||||
|
@ -27,7 +27,7 @@ class Bot:
|
|||
|
||||
self.dispatcher.add_handler(MessageHandler(data_learner=DataLearner(),
|
||||
reply_generator=ReplyGenerator(),
|
||||
links_checker=LinksChecker(redis),
|
||||
media_checker=MediaUniquenessChecker(redis),
|
||||
chance_manager=chance_manager))
|
||||
self.dispatcher.add_handler(CommandHandler(chance_manager=chance_manager))
|
||||
self.dispatcher.add_handler(StatusHandler(chat_purge_queue=ChatPurgeQueue(self.updater.job_queue, redis)))
|
||||
|
|
|
@ -4,7 +4,8 @@ sections = {
|
|||
'bot': ['token', 'name', 'anchors', 'messages', 'purge_interval', 'default_chance', 'spam_stickers'],
|
||||
'grammar': ['end_sentence', 'all'],
|
||||
'logging': ['level'],
|
||||
'links': ['lifetime', 'stickers'],
|
||||
'updates': ['mode'],
|
||||
'media_checker': ['lifetime', 'stickers'],
|
||||
'redis': ['host', 'port', 'db'],
|
||||
'db': []
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
import random
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
from .abstract_entity import AbstractEntity
|
||||
from src.utils import deep_get_attr
|
||||
from src.config import config
|
||||
|
@ -14,11 +13,9 @@ class Message(AbstractEntity):
|
|||
|
||||
if self.has_text():
|
||||
self.text = message.text
|
||||
self.links = self.__get_links()
|
||||
self.words = self.__get_words()
|
||||
else:
|
||||
self.text = ''
|
||||
self.links = []
|
||||
self.words = []
|
||||
|
||||
def has_text(self):
|
||||
|
@ -41,9 +38,6 @@ class Message(AbstractEntity):
|
|||
"""
|
||||
return self.message.entities is not None
|
||||
|
||||
def has_links(self):
|
||||
return len(self.links) != 0
|
||||
|
||||
def has_anchors(self):
|
||||
"""Returns True if the message contains at least one anchor from anchors config.
|
||||
"""
|
||||
|
@ -73,23 +67,6 @@ class Message(AbstractEntity):
|
|||
or self.is_reply_to_bot() \
|
||||
or self.is_random_answer()
|
||||
|
||||
def __get_links(self):
|
||||
links = []
|
||||
|
||||
def prettify(url):
|
||||
if not url.startswith('http://') and not url.startswith('https://'):
|
||||
url = 'http://' + url
|
||||
|
||||
link = urlparse(url)
|
||||
host = '.'.join(link.hostname.split('.')[-2:])
|
||||
return '{}{}#{}?{}'.format(host, link.path, link.fragment, link.query)
|
||||
|
||||
for entity in filter(lambda e: e.type == 'url', self.message.entities):
|
||||
link = prettify(self.text[entity.offset:entity.length + entity.offset])
|
||||
links.append(link)
|
||||
|
||||
return links
|
||||
|
||||
def __get_words(self):
|
||||
symbols = list(re.sub('\s', ' ', self.text))
|
||||
|
||||
|
|
|
@ -8,14 +8,17 @@ from src.domain.message import Message
|
|||
|
||||
|
||||
class MessageHandler(ParentHandler):
|
||||
def __init__(self, data_learner, reply_generator, links_checker, chance_manager):
|
||||
spam_stickers = config.getlist('bot', 'spam_stickers')
|
||||
media_checker_stickers = config.getlist('media_checker', 'stickers')
|
||||
|
||||
def __init__(self, data_learner, reply_generator, media_checker, chance_manager):
|
||||
super(MessageHandler, self).__init__(
|
||||
Filters.text | Filters.sticker,
|
||||
self.handle)
|
||||
|
||||
self.data_learner = data_learner
|
||||
self.reply_generator = reply_generator
|
||||
self.links_checker = links_checker
|
||||
self.media_checker = media_checker
|
||||
self.chance_manager = chance_manager
|
||||
|
||||
def handle(self, bot, update):
|
||||
|
@ -30,14 +33,16 @@ class MessageHandler(ParentHandler):
|
|||
self.__process_sticker(bot, message)
|
||||
|
||||
def __check_media_uniqueness(self, bot, message):
|
||||
if message.has_links() and self.links_checker.check(message.chat_id, message.links):
|
||||
if not message.is_private()\
|
||||
and message.has_entities()\
|
||||
and self.media_checker.check(message):
|
||||
logging.debug("[Chat %s %s not unique media]" %
|
||||
(message.chat_type,
|
||||
message.chat_id))
|
||||
|
||||
bot.send_sticker(chat_id=message.chat_id,
|
||||
reply_to_message_id=message.message.message_id,
|
||||
sticker=choice(config.getlist('links', 'stickers')))
|
||||
sticker=choice(self.media_checker_stickers))
|
||||
|
||||
def __process_message(self, bot, message):
|
||||
logging.debug("[Chat %s %s message length] %s" %
|
||||
|
@ -73,4 +78,4 @@ class MessageHandler(ParentHandler):
|
|||
|
||||
bot.send_sticker(chat_id=message.chat_id,
|
||||
reply_to_message_id=message.message.message_id,
|
||||
sticker=choice(config.getlist('bot', 'spam_stickers')))
|
||||
sticker=choice(self.spam_stickers))
|
||||
|
|
|
@ -1,26 +0,0 @@
|
|||
from datetime import datetime, timedelta
|
||||
from src.config import config
|
||||
|
||||
|
||||
class LinksChecker:
|
||||
def __init__(self, redis):
|
||||
self.redis = redis
|
||||
self.lifetime = timedelta(seconds=config.getfloat('links', 'lifetime'))
|
||||
self.key = "links:{}"
|
||||
|
||||
def check(self, chat_id, links):
|
||||
"""Returns True if at least one link already exists
|
||||
"""
|
||||
|
||||
redis = self.redis.instance()
|
||||
key = self.key.format(chat_id)
|
||||
now = datetime.now()
|
||||
delete_at = (now + self.lifetime).timestamp()
|
||||
|
||||
redis.zremrangebyscore(key, 0, now.timestamp())
|
||||
|
||||
pipe = redis.pipeline()
|
||||
for link in links:
|
||||
pipe.zadd(key, link, delete_at)
|
||||
|
||||
return any(x == 0 for x in pipe.execute())
|
|
@ -0,0 +1,45 @@
|
|||
from datetime import datetime, timedelta
|
||||
from src.config import config
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
class MediaUniquenessChecker:
|
||||
lifetime = timedelta(seconds=config.getfloat('media_checker', 'lifetime'))
|
||||
key = "media_checker:{}"
|
||||
|
||||
def __init__(self, redis):
|
||||
self.redis = redis
|
||||
|
||||
def check(self, message):
|
||||
"""Returns True if at least one media entity was already in this chat
|
||||
"""
|
||||
|
||||
redis = self.redis.instance()
|
||||
key = self.key.format(message.chat_id)
|
||||
now = datetime.now()
|
||||
delete_at = (now + self.lifetime).timestamp()
|
||||
|
||||
redis.zremrangebyscore(key, 0, now.timestamp())
|
||||
|
||||
pipe = redis.pipeline()
|
||||
for element in self.__extract_media(message):
|
||||
pipe.zadd(key, element, delete_at)
|
||||
|
||||
return any(x == 0 for x in pipe.execute())
|
||||
|
||||
def __extract_media(self, message):
|
||||
links = []
|
||||
|
||||
def prettify(url):
|
||||
if not url.startswith('http://') and not url.startswith('https://'):
|
||||
url = 'http://' + url
|
||||
|
||||
link = urlparse(url)
|
||||
host = '.'.join(link.hostname.split('.')[-2:])
|
||||
return '{}{}#{}?{}'.format(host, link.path, link.fragment, link.query)
|
||||
|
||||
for entity in filter(lambda e: e.type == 'url', message.message.entities):
|
||||
link = prettify(message.text[entity.offset:entity.length + entity.offset])
|
||||
links.append(link)
|
||||
|
||||
return links
|
Loading…
Reference in New Issue