This commit is contained in:
REDNBLACK 2016-12-28 21:01:37 +03:00
parent 1852092393
commit b3f8809863
7 changed files with 18 additions and 24 deletions

View File

@ -34,8 +34,8 @@ for section, options in sections.items():
from src.redis_c import Redis
redis = Redis(config)
from src.service.tokenizer import Tokenizer
tokenz = Tokenizer()
from src.tokenizer import Tokenizer
tokenizer = Tokenizer()
from src.repository import *
trigram_repository = TrigramRepository()
@ -43,10 +43,7 @@ chance_repository = ChanceRepository()
media_repository = MediaRepository()
job_repository = JobRepository()
from src.service.data_learner import DataLearner
from src.service.reply_generator import ReplyGenerator
from src.service.media_uniqueness_checker import MediaUniquenessChecker
from src.service.chat_purge_queue import ChatPurgeQueue
from src.service import *
data_learner = DataLearner()
reply_generator = ReplyGenerator()
media_checker = MediaUniquenessChecker()

View File

@ -1,4 +1,4 @@
from abc import ABC, abstractmethod
from abc import ABC
class BaseRepository(ABC):

View File

@ -1,12 +1,11 @@
from . import RedisRepository
from src.config import redis, encoding
from src.config import encoding
import json
class JobRepository(RedisRepository):
def __init__(self):
RedisRepository.__init__(self, source_name='jobs')
self.redis = redis
def add(self, chat_id, datetime):
self.redis.instance().hset(

View File

@ -0,0 +1,4 @@
from .data_learner import DataLearner
from .reply_generator import ReplyGenerator
from .chat_purge_queue import ChatPurgeQueue
from .media_uniqueness_checker import MediaUniquenessChecker

View File

@ -1,10 +1,10 @@
from src.config import trigram_repository, tokenz
from src.config import trigram_repository, tokenizer
class DataLearner:
def __init__(self):
self.trigram_repository = trigram_repository
self.tokenizer = tokenz
self.tokenizer = tokenizer
def learn(self, message):
words = self.tokenizer.extract_words(message)

View File

@ -1,11 +1,11 @@
from src.config import config, redis, tokenz, trigram_repository
from src.config import config, redis, tokenizer, trigram_repository
from src.utils import strings_has_equal_letters, capitalize
class ReplyGenerator:
def __init__(self):
self.redis = redis
self.tokenizer = tokenz
self.tokenizer = tokenizer
self.trigram_repository = trigram_repository
self.max_words = config.getint('grammar', 'max_words')
@ -16,20 +16,15 @@ class ReplyGenerator:
self.end_sentence = config['grammar']['end_sentence']
def generate(self, message):
messages = []
words = self.tokenizer.extract_words(message)
for trigram in self.tokenizer.split_to_trigrams(words):
pair = trigram[:-1]
pairs = [trigram[:-1] for trigram in self.tokenizer.split_to_trigrams(words)]
messages = [self.__generate_best_message(chat_id=message.chat_id, pair=pair) for pair in pairs]
longest_message = max(messages, key=len) if len(messages) else ''
messages.append(self.__generate_best_message(chat_id=message.chat_id, pair=pair))
result = max(messages, key=len) if len(messages) else ''
if strings_has_equal_letters(result, ''.join(words)):
if longest_message and strings_has_equal_letters(longest_message, ''.join(words)):
return ''
return result
return longest_message
def __generate_best_message(self, chat_id, pair):
best_message = ''

View File

@ -6,7 +6,6 @@ from src.config import config
class Tokenizer:
def __init__(self):
self.chain_length = config.getint('grammar', 'chain_length')
self.separator = config['grammar']['separator']
self.stop_word = config['grammar']['stop_word']
self.end_sentence = config['grammar']['end_sentence']
self.garbage_tokens = config['grammar']['all']