diff --git a/nlp.py b/nlp.py
index 036e929..fd126c3 100644
--- a/nlp.py
+++ b/nlp.py
@@ -1,6 +1,3 @@
-
-
-import json
 import pandas as pd
 import nltk
 import numpy as np
@@ -9,22 +6,14 @@ from nltk.stem import WordNetLemmatizer, SnowballStemmer
 from gensim.parsing.preprocessing import STOPWORDS
 from gensim.utils import simple_preprocess
 import gensim
-
-
 from sklearn.datasets import fetch_20newsgroups
-newsgroups_train = fetch_20newsgroups(subset='train', shuffle=True)
-newsgroups_test = fetch_20newsgroups(subset='test', shuffle=True)
+
+newsgroups_train = fetch_20newsgroups(subset='train')
+newsgroups_test = fetch_20newsgroups(subset='test')
 
 np.random.seed(400)
-
-
 stemmer = SnowballStemmer("english")
-
-# context = ssl._create_unverified_context()
-
-
-def get_data():
-    pass
+NUM_TOPICS = 7
 
 
 def lemmatize_stemming(text):
@@ -41,11 +30,13 @@ def preprocess(text):
     return result
 
 
-def categorize_str(s: str) -> int:
+def categorize_str(s: str, lda_model) -> int:
     """
     Takes in a string to determine which topic it belongs to
     Returns the topic number as an int
     """
+    processed_doc = preprocess(s)
+    dictionary = gensim.corpora.Dictionary([processed_doc])
     bow_vector = dictionary.doc2bow(preprocess(s))
     ldaResults = sorted(lda_model[bow_vector], key=lambda tup: -1*tup[1])
     return ldaResults[0][0]
@@ -54,25 +45,32 @@ def categorize_str(s: str) -> int:
 def create_model(documents: list):
     """
     Takes a list of strings to create model
-    returns the lda model and dictionary
+    returns the lda model
     """
     processed_docs = []
-    for doc in newsgroups_train.data:
+    for doc in documents:
         processed_docs.append(preprocess(doc))
     dictionary = gensim.corpora.Dictionary(processed_docs)
     bow_corpus = [dictionary.doc2bow(doc) for doc in processed_docs]
     lda_model = gensim.models.LdaMulticore(bow_corpus,
-                                           num_topics=7,
+                                           num_topics=NUM_TOPICS,
                                            id2word=dictionary,
                                            passes=10,
                                            workers=2)
-    return(lda_model, dictionary)
+    return lda_model
 
 
-lda_model, dictionary = create_model(newsgroups_train.data)
-for idx, topic in lda_model.show_topics(formatted=False, num_words=30):
-    print('Topic: {} \nWords: {}'.format(idx, [w[0] for w in topic]))
+def update_model(s: str, lda_model):
+    """
+    Takes in a string to update model
+    Trains model using string
+    """
+    processed_doc = preprocess(s)
+    dictionary = gensim.corpora.Dictionary([processed_doc])
+    bow_corpus = [dictionary.doc2bow(processed_doc)]
+    lda_model.update(bow_corpus)
 
-for ind in range(len(newsgroups_test)):
-    unseenDoc = newsgroups_test.data[ind]
-    print(ind, categorize_str(unseenDoc))
+
+# lda_model = create_model(newsgroups_train.data)
+# update_model("Hello everyone", lda_model)
+# print(categorize_str("Hello world", lda_model))
diff --git a/selector.py b/selector.py
new file mode 100644
index 0000000..422483a
--- /dev/null
+++ b/selector.py
@@ -0,0 +1,17 @@
+import nlp
+import random
+
+# get user preference from database (i.e. how many times they clicked on some certain type of article)
+# prob = [1/nlp.NUM_TOPICS for i in range(nlp.NUM_TOPICS)]
+# manipulate prob based on user preference
+
+
+def get_topics(weights, num_reccomendations):
+    """
+    Takes in weights as list/tuple, ex: (0.1, 0.2, 0.3)
+    Returns a list of topics
+    """
+    return random.choices([*range(nlp.NUM_TOPICS)], weights, k=num_reccomendations)
+
+
+# print(get_topics(prob, 4))