From b3f880986311784b50683c90f773c0ded9a77f67 Mon Sep 17 00:00:00 2001 From: REDNBLACK Date: Wed, 28 Dec 2016 21:01:37 +0300 Subject: [PATCH] #19 Fixes --- src/config.py | 9 +++------ src/repository/base_repository.py | 2 +- src/repository/job_repository.py | 3 +-- src/service/__init__.py | 4 ++++ src/service/data_learner.py | 4 ++-- src/service/reply_generator.py | 19 +++++++------------ src/{service => }/tokenizer.py | 1 - 7 files changed, 18 insertions(+), 24 deletions(-) rename src/{service => }/tokenizer.py (96%) diff --git a/src/config.py b/src/config.py index cb9fea4..52ee45e 100644 --- a/src/config.py +++ b/src/config.py @@ -34,8 +34,8 @@ for section, options in sections.items(): from src.redis_c import Redis redis = Redis(config) -from src.service.tokenizer import Tokenizer -tokenz = Tokenizer() +from src.tokenizer import Tokenizer +tokenizer = Tokenizer() from src.repository import * trigram_repository = TrigramRepository() @@ -43,10 +43,7 @@ chance_repository = ChanceRepository() media_repository = MediaRepository() job_repository = JobRepository() -from src.service.data_learner import DataLearner -from src.service.reply_generator import ReplyGenerator -from src.service.media_uniqueness_checker import MediaUniquenessChecker -from src.service.chat_purge_queue import ChatPurgeQueue +from src.service import * data_learner = DataLearner() reply_generator = ReplyGenerator() media_checker = MediaUniquenessChecker() diff --git a/src/repository/base_repository.py b/src/repository/base_repository.py index 4d307d6..3acd188 100644 --- a/src/repository/base_repository.py +++ b/src/repository/base_repository.py @@ -1,4 +1,4 @@ -from abc import ABC, abstractmethod +from abc import ABC class BaseRepository(ABC): diff --git a/src/repository/job_repository.py b/src/repository/job_repository.py index 39c8604..ac33c48 100644 --- a/src/repository/job_repository.py +++ b/src/repository/job_repository.py @@ -1,12 +1,11 @@ from . import RedisRepository -from src.config import redis, encoding +from src.config import encoding import json class JobRepository(RedisRepository): def __init__(self): RedisRepository.__init__(self, source_name='jobs') - self.redis = redis def add(self, chat_id, datetime): self.redis.instance().hset( diff --git a/src/service/__init__.py b/src/service/__init__.py index e69de29..fb2d6ed 100644 --- a/src/service/__init__.py +++ b/src/service/__init__.py @@ -0,0 +1,4 @@ +from .data_learner import DataLearner +from .reply_generator import ReplyGenerator +from .chat_purge_queue import ChatPurgeQueue +from .media_uniqueness_checker import MediaUniquenessChecker diff --git a/src/service/data_learner.py b/src/service/data_learner.py index 61ef931..fe3393f 100644 --- a/src/service/data_learner.py +++ b/src/service/data_learner.py @@ -1,10 +1,10 @@ -from src.config import trigram_repository, tokenz +from src.config import trigram_repository, tokenizer class DataLearner: def __init__(self): self.trigram_repository = trigram_repository - self.tokenizer = tokenz + self.tokenizer = tokenizer def learn(self, message): words = self.tokenizer.extract_words(message) diff --git a/src/service/reply_generator.py b/src/service/reply_generator.py index 9f6bccb..21261ed 100644 --- a/src/service/reply_generator.py +++ b/src/service/reply_generator.py @@ -1,11 +1,11 @@ -from src.config import config, redis, tokenz, trigram_repository +from src.config import config, redis, tokenizer, trigram_repository from src.utils import strings_has_equal_letters, capitalize class ReplyGenerator: def __init__(self): self.redis = redis - self.tokenizer = tokenz + self.tokenizer = tokenizer self.trigram_repository = trigram_repository self.max_words = config.getint('grammar', 'max_words') @@ -16,20 +16,15 @@ class ReplyGenerator: self.end_sentence = config['grammar']['end_sentence'] def generate(self, message): - messages = [] - words = self.tokenizer.extract_words(message) - for trigram in self.tokenizer.split_to_trigrams(words): - pair = trigram[:-1] + pairs = [trigram[:-1] for trigram in self.tokenizer.split_to_trigrams(words)] + messages = [self.__generate_best_message(chat_id=message.chat_id, pair=pair) for pair in pairs] + longest_message = max(messages, key=len) if len(messages) else '' - messages.append(self.__generate_best_message(chat_id=message.chat_id, pair=pair)) - - result = max(messages, key=len) if len(messages) else '' - - if strings_has_equal_letters(result, ''.join(words)): + if longest_message and strings_has_equal_letters(longest_message, ''.join(words)): return '' - return result + return longest_message def __generate_best_message(self, chat_id, pair): best_message = '' diff --git a/src/service/tokenizer.py b/src/tokenizer.py similarity index 96% rename from src/service/tokenizer.py rename to src/tokenizer.py index 660e428..cf39532 100644 --- a/src/service/tokenizer.py +++ b/src/tokenizer.py @@ -6,7 +6,6 @@ from src.config import config class Tokenizer: def __init__(self): self.chain_length = config.getint('grammar', 'chain_length') - self.separator = config['grammar']['separator'] self.stop_word = config['grammar']['stop_word'] self.end_sentence = config['grammar']['end_sentence'] self.garbage_tokens = config['grammar']['all']