[edit]
1 예제 #
# -*- coding: utf-8 -*- """ Created on Thu Feb 7 23:20:11 2019 @author: jhlee """ from gensim.models import Word2Vec import pyodbc import sys conn = pyodbc.connect(driver='{SQL Server}', host='192.168.0.1', database='gamelog', user='id', password='pw') cursor = conn.cursor() sql = """ select top 10000 msg , count(*) cnt from dbo.sentences where 1=1 and date_key = '20190207' and hh = 14 group by msg order by cnt desc """ #sql = sql.replace("@date_key", sys.argv[1]) cursor.execute(sql) row = cursor.fetchone() i = 1 sentences = [] while row: #print(row[0]) #msg sentences.append(row[0].split(" ")) i = i + 1 row = cursor.fetchone() cursor.close() #print(sentences[0]) #["''", '아', '병신'] model = Word2Vec(sentences, size=100, batch_words=10, min_count=50) model.init_sims(replace=True) model.wv.most_similar("병신")
[edit]
2 모델 저장/불러오기 #
#모델 저장/불러오기 model.save("gold_dealer_model") model = Word2Vec.load("gold_dealer_model")
[edit]
3 버케블러리에 있는 것만... #
s1 = list(filter(lambda x: x in model.wv.vocab, s1.split(" "))) s2 = list(filter(lambda x: x in model.wv.vocab, s2.split(" ")))
댓글 없음:
댓글 쓰기