mirror of
https://github.com/SkalaraAI/management-llm.git
synced 2025-04-03 20:10:20 -04:00
Add files via upload
This commit is contained in:
parent
75fcf820b5
commit
44a5802caa
999
ResumeClassification.ipynb
Normal file
999
ResumeClassification.ipynb
Normal file
|
@ -0,0 +1,999 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Category</th>\n",
|
||||
" <th>Resume</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Data Science</td>\n",
|
||||
" <td>Skills * Programming Languages: Python (pandas...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>Data Science</td>\n",
|
||||
" <td>Education Details \\r\\nMay 2013 to May 2017 B.E...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>Data Science</td>\n",
|
||||
" <td>Areas of Interest Deep Learning, Control Syste...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>Data Science</td>\n",
|
||||
" <td>Skills ⢠R ⢠Python ⢠SAP HANA ⢠Table...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>Data Science</td>\n",
|
||||
" <td>Education Details \\r\\n MCA YMCAUST, Faridab...</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Category Resume\n",
|
||||
"0 Data Science Skills * Programming Languages: Python (pandas...\n",
|
||||
"1 Data Science Education Details \\r\\nMay 2013 to May 2017 B.E...\n",
|
||||
"2 Data Science Areas of Interest Deep Learning, Control Syste...\n",
|
||||
"3 Data Science Skills ⢠R ⢠Python ⢠SAP HANA ⢠Table...\n",
|
||||
"4 Data Science Education Details \\r\\n MCA YMCAUST, Faridab..."
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df = pd.read_csv('/Users/sachithmankala/Skalara/UpdatedResumeDataset2.csv')\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Category</th>\n",
|
||||
" <th>Resume</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>957</th>\n",
|
||||
" <td>Testing</td>\n",
|
||||
" <td>Computer Skills: ⢠Proficient in MS office (...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>958</th>\n",
|
||||
" <td>Testing</td>\n",
|
||||
" <td>â Willingness to accept the challenges. â ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>959</th>\n",
|
||||
" <td>Testing</td>\n",
|
||||
" <td>PERSONAL SKILLS ⢠Quick learner, ⢠Eagerne...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>960</th>\n",
|
||||
" <td>Testing</td>\n",
|
||||
" <td>COMPUTER SKILLS & SOFTWARE KNOWLEDGE MS-Power ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>961</th>\n",
|
||||
" <td>Testing</td>\n",
|
||||
" <td>Skill Set OS Windows XP/7/8/8.1/10 Database MY...</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Category Resume\n",
|
||||
"957 Testing Computer Skills: ⢠Proficient in MS office (...\n",
|
||||
"958 Testing â Willingness to accept the challenges. â ...\n",
|
||||
"959 Testing PERSONAL SKILLS ⢠Quick learner, ⢠Eagerne...\n",
|
||||
"960 Testing COMPUTER SKILLS & SOFTWARE KNOWLEDGE MS-Power ...\n",
|
||||
"961 Testing Skill Set OS Windows XP/7/8/8.1/10 Database MY..."
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.tail()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Category object\n",
|
||||
"Resume object\n",
|
||||
"Cleaned Resume object\n",
|
||||
"Labels int64\n",
|
||||
"dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.dtypes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pd.set_option('max_colwidth', 800)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Category</th>\n",
|
||||
" <th>Resume</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>957</th>\n",
|
||||
" <td>Testing</td>\n",
|
||||
" <td>Computer Skills: ⢠Proficient in MS office (Word, Basic Excel, Power point) Strength: ⢠Hard working, Loyalty & Creativity ⢠Self-motivated, Responsible & Initiative ⢠Good people management skill & positive attitude. ⢠knowledge of windows, Internet.Education Details \\r\\n Bachelor of Electrical Engineering Electrical Engineering Nashik, Maharashtra Guru Gobind Singh College of Engineering and Research Centre\\r\\n Diploma Electrical Engineering Nashik, Maharashtra S. M. E. S. Polytechnic College\\r\\nTesting Engineer \\r\\n\\r\\n\\r\\nSkill Details \\r\\nEXCEL- Exprience - 6 months\\r\\nMS OFFICE- Exprience - 6 months\\r\\nWORD- Exprience - 6 monthsCompany Details \\r\\ncompany - \\r\\ndescription - Department: Testing\\r\\n\\r\\nResponsibilities: ⢠To check ACB and VCB of Circuit Breaker.\\r\\nâ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>958</th>\n",
|
||||
" <td>Testing</td>\n",
|
||||
" <td>â Willingness to accept the challenges. â Positive thinking. â Good learner. â Team Player. DECLARATION: I hereby declare that the above mentioned information is correct up to my knowledge and I bear the responsibility for the correctness of the above mentioned particulars. Date: / / Name: Dongare Mandakini Murlidhar Signature: Education Details \\r\\nJune 2015 Electronics and Telecommunication Engineering Kolhapur, Maharashtra Shivaji University\\r\\nJune 2012 Education Secondary and Higher Secondary\\r\\n B.E. Electronics and Telecommunication Jaywant College of Engineering and Management\\r\\nTesting Engineer \\r\\n\\r\\nElectronics Engineer - Abacus Electronics Pvt Ltd\\r\\nSkill Details \\r\\nLanguage - C, C++- Exprience - Less than 1 year months\\r\\nOperating Systems- Windows 7-8/NT/X...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>959</th>\n",
|
||||
" <td>Testing</td>\n",
|
||||
" <td>PERSONAL SKILLS ⢠Quick learner, ⢠Eagerness to learn new things, ⢠Competitive attitude, ⢠Good leadership qualities, ⢠Ability to deal with people diplomatically. PERSONAL DOSSIER Fathers Name: Dhanraj WaghEducation Details \\r\\nJanuary 2012 to January 2016 Bachelors of Engineering Engineering Pune, Maharashtra Pune University\\r\\nJanuary 2012 Higher Secondary Certificate Nashik, Maharashtra SND College of Engineering and Research Center\\r\\nJanuary 2010 Secondary School Certificate Yeola, Maharashtra Swami Muktanand Jr. College\\r\\n HSC Maharashtra State Board\\r\\n SSC Maharashtra State Bard\\r\\n BE Rajapur, Uttar Pradesh Madhyamik Vidya Mandir Rajapur\\r\\nTesting and Quality Control Engineer \\r\\n\\r\\nTesting and Quality Control Engineer - M/S Rakesh Transformer Industries ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>960</th>\n",
|
||||
" <td>Testing</td>\n",
|
||||
" <td>COMPUTER SKILLS & SOFTWARE KNOWLEDGE MS-Power Point, MS - Office, C, Protius (PCB Design), Multisim, Micro wind, Matlab, Keil, Latex, Basic I nternet Fundamentals, Software and Hardware Knowledge PROJECT DETAILS Diploma Project: Speed Control of DC Motor Using Heart Beats. Mini Project: Water Gardening System Using Solar Panel. Final Year BE Project: Iris Recognition system.Education Details \\r\\nJanuary 2016 BE EDUCATION Pune, Maharashtra PUNE University\\r\\nJanuary 2010 SSC Maharashtra Board\\r\\nQuality Engineer \\r\\n\\r\\nQuality Engineer - Matrix Technologies\\r\\nSkill Details \\r\\nMATLAB- Exprience - 6 months\\r\\nPCB- Exprience - 6 months\\r\\nPCB DESIGN- Exprience - 6 monthsCompany Details \\r\\ncompany - Matrix Technologies\\r\\ndescription - \\r\\ncompany - RB Electronics\\r\\ndescription -</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>961</th>\n",
|
||||
" <td>Testing</td>\n",
|
||||
" <td>Skill Set OS Windows XP/7/8/8.1/10 Database MYSQL, sql server 2005, 2008 & 2012 Languages Core Java Web Technology HTML, CSS Testing Manual Testing, Database Testing Other Bug tracking and reporting, End user handling.Education Details \\r\\nJanuary 2016 MCS Pune, Maharashtra P.V.P College Pravaranagar\\r\\nJanuary 2011 HSC A.K.Junior College\\r\\nJanuary 2009 SSC A.K.Mahavidyalya\\r\\nJR TESTING ENGINEER \\r\\n\\r\\nJR TESTING ENGINEER - M-Tech Innovations Ltd\\r\\nSkill Details \\r\\nTESTING- Exprience - 24 months\\r\\nWINDOWS XP- Exprience - 24 months\\r\\nCSS- Exprience - 6 months\\r\\nDATABASE- Exprience - 6 months\\r\\nDATABASE TESTING- Exprience - 6 monthsCompany Details \\r\\ncompany - M-Tech Innovations Ltd\\r\\ndescription - Responsibilities ⢠Analyzing the Testing Requirements ⢠Prepar...</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Category \\\n",
|
||||
"957 Testing \n",
|
||||
"958 Testing \n",
|
||||
"959 Testing \n",
|
||||
"960 Testing \n",
|
||||
"961 Testing \n",
|
||||
"\n",
|
||||
" Resume \n",
|
||||
"957 Computer Skills: ⢠Proficient in MS office (Word, Basic Excel, Power point) Strength: ⢠Hard working, Loyalty & Creativity ⢠Self-motivated, Responsible & Initiative ⢠Good people management skill & positive attitude. ⢠knowledge of windows, Internet.Education Details \\r\\n Bachelor of Electrical Engineering Electrical Engineering Nashik, Maharashtra Guru Gobind Singh College of Engineering and Research Centre\\r\\n Diploma Electrical Engineering Nashik, Maharashtra S. M. E. S. Polytechnic College\\r\\nTesting Engineer \\r\\n\\r\\n\\r\\nSkill Details \\r\\nEXCEL- Exprience - 6 months\\r\\nMS OFFICE- Exprience - 6 months\\r\\nWORD- Exprience - 6 monthsCompany Details \\r\\ncompany - \\r\\ndescription - Department: Testing\\r\\n\\r\\nResponsibilities: ⢠To check ACB and VCB of Circuit Breaker.\\r\\nâ... \n",
|
||||
"958 â Willingness to accept the challenges. â Positive thinking. â Good learner. â Team Player. DECLARATION: I hereby declare that the above mentioned information is correct up to my knowledge and I bear the responsibility for the correctness of the above mentioned particulars. Date: / / Name: Dongare Mandakini Murlidhar Signature: Education Details \\r\\nJune 2015 Electronics and Telecommunication Engineering Kolhapur, Maharashtra Shivaji University\\r\\nJune 2012 Education Secondary and Higher Secondary\\r\\n B.E. Electronics and Telecommunication Jaywant College of Engineering and Management\\r\\nTesting Engineer \\r\\n\\r\\nElectronics Engineer - Abacus Electronics Pvt Ltd\\r\\nSkill Details \\r\\nLanguage - C, C++- Exprience - Less than 1 year months\\r\\nOperating Systems- Windows 7-8/NT/X... \n",
|
||||
"959 PERSONAL SKILLS ⢠Quick learner, ⢠Eagerness to learn new things, ⢠Competitive attitude, ⢠Good leadership qualities, ⢠Ability to deal with people diplomatically. PERSONAL DOSSIER Fathers Name: Dhanraj WaghEducation Details \\r\\nJanuary 2012 to January 2016 Bachelors of Engineering Engineering Pune, Maharashtra Pune University\\r\\nJanuary 2012 Higher Secondary Certificate Nashik, Maharashtra SND College of Engineering and Research Center\\r\\nJanuary 2010 Secondary School Certificate Yeola, Maharashtra Swami Muktanand Jr. College\\r\\n HSC Maharashtra State Board\\r\\n SSC Maharashtra State Bard\\r\\n BE Rajapur, Uttar Pradesh Madhyamik Vidya Mandir Rajapur\\r\\nTesting and Quality Control Engineer \\r\\n\\r\\nTesting and Quality Control Engineer - M/S Rakesh Transformer Industries ... \n",
|
||||
"960 COMPUTER SKILLS & SOFTWARE KNOWLEDGE MS-Power Point, MS - Office, C, Protius (PCB Design), Multisim, Micro wind, Matlab, Keil, Latex, Basic I nternet Fundamentals, Software and Hardware Knowledge PROJECT DETAILS Diploma Project: Speed Control of DC Motor Using Heart Beats. Mini Project: Water Gardening System Using Solar Panel. Final Year BE Project: Iris Recognition system.Education Details \\r\\nJanuary 2016 BE EDUCATION Pune, Maharashtra PUNE University\\r\\nJanuary 2010 SSC Maharashtra Board\\r\\nQuality Engineer \\r\\n\\r\\nQuality Engineer - Matrix Technologies\\r\\nSkill Details \\r\\nMATLAB- Exprience - 6 months\\r\\nPCB- Exprience - 6 months\\r\\nPCB DESIGN- Exprience - 6 monthsCompany Details \\r\\ncompany - Matrix Technologies\\r\\ndescription - \\r\\ncompany - RB Electronics\\r\\ndescription - \n",
|
||||
"961 Skill Set OS Windows XP/7/8/8.1/10 Database MYSQL, sql server 2005, 2008 & 2012 Languages Core Java Web Technology HTML, CSS Testing Manual Testing, Database Testing Other Bug tracking and reporting, End user handling.Education Details \\r\\nJanuary 2016 MCS Pune, Maharashtra P.V.P College Pravaranagar\\r\\nJanuary 2011 HSC A.K.Junior College\\r\\nJanuary 2009 SSC A.K.Mahavidyalya\\r\\nJR TESTING ENGINEER \\r\\n\\r\\nJR TESTING ENGINEER - M-Tech Innovations Ltd\\r\\nSkill Details \\r\\nTESTING- Exprience - 24 months\\r\\nWINDOWS XP- Exprience - 24 months\\r\\nCSS- Exprience - 6 months\\r\\nDATABASE- Exprience - 6 months\\r\\nDATABASE TESTING- Exprience - 6 monthsCompany Details \\r\\ncompany - M-Tech Innovations Ltd\\r\\ndescription - Responsibilities ⢠Analyzing the Testing Requirements ⢠Prepar... "
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.tail()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Index(['Category', 'Resume'], dtype='object')"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"25"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(df.Category.value_counts())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Java Developer 84\n",
|
||||
"Testing 70\n",
|
||||
"DevOps Engineer 55\n",
|
||||
"Python Developer 48\n",
|
||||
"Web Designing 45\n",
|
||||
"HR 44\n",
|
||||
"Hadoop 42\n",
|
||||
"Operations Manager 40\n",
|
||||
"Sales 40\n",
|
||||
"Blockchain 40\n",
|
||||
"Data Science 40\n",
|
||||
"Mechanical Engineer 40\n",
|
||||
"ETL Developer 40\n",
|
||||
"Arts 36\n",
|
||||
"Database 33\n",
|
||||
"PMO 30\n",
|
||||
"Health and fitness 30\n",
|
||||
"Electrical Engineering 30\n",
|
||||
"Business Analyst 28\n",
|
||||
"DotNet Developer 28\n",
|
||||
"Automation Testing 26\n",
|
||||
"Network Security Engineer 25\n",
|
||||
"Civil Engineer 24\n",
|
||||
"SAP Developer 24\n",
|
||||
"Advocate 20\n",
|
||||
"Name: Category, dtype: int64"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.Category.value_counts()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 962 entries, 0 to 961\n",
|
||||
"Data columns (total 2 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 Category 962 non-null object\n",
|
||||
" 1 Resume 962 non-null object\n",
|
||||
"dtypes: object(2)\n",
|
||||
"memory usage: 15.2+ KB\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Education Details \\r\\nMay 2013 to May 2017 B.E UIT-RGPV\\r\\nData Scientist \\r\\n\\r\\nData Scientist - Matelabs\\r\\nSkill Details \\r\\nPython- Exprience - Less than 1 year months\\r\\nStatsmodels- Exprience - 12 months\\r\\nAWS- Exprience - Less than 1 year months\\r\\nMachine learning- Exprience - Less than 1 year months\\r\\nSklearn- Exprience - Less than 1 year months\\r\\nScipy- Exprience - Less than 1 year months\\r\\nKeras- Exprience - Less than 1 year monthsCompany Details \\r\\ncompany - Matelabs\\r\\ndescription - ML Platform for business professionals, dummies and enthusiasts.\\r\\n60/A Koramangala 5th block,\\r\\nAchievements/Tasks behind sukh sagar, Bengaluru,\\r\\nIndia Developed and deployed auto preprocessing steps of machine learning mainly missing value\\r\\ntreatment, outlier detection, encoding, scaling, feature selection and dimensionality reduction.\\r\\nDeployed automated classification and regression model.\\r\\nlinkedin.com/in/aditya-rathore-\\r\\nb4600b146 Reasearch and deployed the time series forecasting model ARIMA, SARIMAX, Holt-winter and\\r\\nProphet.\\r\\nWorked on meta-feature extracting problem.\\r\\ngithub.com/rathorology\\r\\nImplemented a state of the art research paper on outlier detection for mixed attributes.\\r\\ncompany - Matelabs\\r\\ndescription - '"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df[\"Resume\"][1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from string import punctuation\n",
|
||||
"print(punctuation)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"def cleanResume(resumeText):\n",
|
||||
" resumeText = re.sub('[%s]' % re.escape(\"\"\"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\"\"\"), ' ', resumeText) # remove punctuations\n",
|
||||
" resumeText = re.sub(r'[^\\x00-\\x7f]',r' ', resumeText) # remove non-ascii characters\n",
|
||||
" resumeText = re.sub('\\s+', ' ', resumeText) # remove extra whitespace\n",
|
||||
" resumeText = re.sub(r'[0-9]+', '', resumeText) #remove numbers\n",
|
||||
" return resumeText.lower()\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1104"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df[\"Cleaned Resume\"] = df[\"Resume\"].apply(lambda x: cleanResume(x))\n",
|
||||
"len(df[\"Cleaned Resume\"][1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Category</th>\n",
|
||||
" <th>Resume</th>\n",
|
||||
" <th>Cleaned Resume</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Data Science</td>\n",
|
||||
" <td>Skills * Programming Languages: Python (pandas, numpy, scipy, scikit-learn, matplotlib), Sql, Java, JavaScript/JQuery. * Machine learning: Regression, SVM, Naïve Bayes, KNN, Random Forest, Decision Trees, Boosting techniques, Cluster Analysis, Word Embedding, Sentiment Analysis, Natural Language processing, Dimensionality reduction, Topic Modelling (LDA, NMF), PCA & Neural Nets. * Database Visualizations: Mysql, SqlServer, Cassandra, Hbase, ElasticSearch D3.js, DC.js, Plotly, kibana, matplotlib, ggplot, Tableau. * Others: Regular Expression, HTML, CSS, Angular 6, Logstash, Kafka, Python Flask, Git, Docker, computer vision - Open CV and understanding of Deep learning.Education Details \\r\\n\\r\\nData Science Assurance Associate \\r\\n\\r\\nData Science Assurance Associate - Ernst & Young LLP\\...</td>\n",
|
||||
" <td>skills programming languages python pandas numpy scipy scikit learn matplotlib sql java javascript jquery machine learning regression svm na ve bayes knn random forest decision trees boosting techniques cluster analysis word embedding sentiment analysis natural language processing dimensionality reduction topic modelling lda nmf pca neural nets database visualizations mysql sqlserver cassandra hbase elasticsearch d js dc js plotly kibana matplotlib ggplot tableau others regular expression html css angular logstash kafka python flask git docker computer vision open cv and understanding of deep learning education details data science assurance associate data science assurance associate ernst young llp skill details javascript exprience months jquery exprience months python exprience ...</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Category \\\n",
|
||||
"0 Data Science \n",
|
||||
"\n",
|
||||
" Resume \\\n",
|
||||
"0 Skills * Programming Languages: Python (pandas, numpy, scipy, scikit-learn, matplotlib), Sql, Java, JavaScript/JQuery. * Machine learning: Regression, SVM, Naïve Bayes, KNN, Random Forest, Decision Trees, Boosting techniques, Cluster Analysis, Word Embedding, Sentiment Analysis, Natural Language processing, Dimensionality reduction, Topic Modelling (LDA, NMF), PCA & Neural Nets. * Database Visualizations: Mysql, SqlServer, Cassandra, Hbase, ElasticSearch D3.js, DC.js, Plotly, kibana, matplotlib, ggplot, Tableau. * Others: Regular Expression, HTML, CSS, Angular 6, Logstash, Kafka, Python Flask, Git, Docker, computer vision - Open CV and understanding of Deep learning.Education Details \\r\\n\\r\\nData Science Assurance Associate \\r\\n\\r\\nData Science Assurance Associate - Ernst & Young LLP\\... \n",
|
||||
"\n",
|
||||
" Cleaned Resume \n",
|
||||
"0 skills programming languages python pandas numpy scipy scikit learn matplotlib sql java javascript jquery machine learning regression svm na ve bayes knn random forest decision trees boosting techniques cluster analysis word embedding sentiment analysis natural language processing dimensionality reduction topic modelling lda nmf pca neural nets database visualizations mysql sqlserver cassandra hbase elasticsearch d js dc js plotly kibana matplotlib ggplot tableau others regular expression html css angular logstash kafka python flask git docker computer vision open cv and understanding of deep learning education details data science assurance associate data science assurance associate ernst young llp skill details javascript exprience months jquery exprience months python exprience ... "
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.head(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"179"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import nltk\n",
|
||||
"#nltk.download('stopwords')\n",
|
||||
"import string\n",
|
||||
"from nltk.corpus import stopwords\n",
|
||||
"from nltk import word_tokenize\n",
|
||||
"\n",
|
||||
"len(stopwords.words('english'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ds_df = df[df.Category == 'Data Science']\n",
|
||||
"resumes=\"\"\n",
|
||||
"total_words = []\n",
|
||||
"for resume in ds_df[\"Cleaned Resume\"]:\n",
|
||||
" resumes += resume\n",
|
||||
" words = word_tokenize(resume)\n",
|
||||
" for word in words :\n",
|
||||
" if word not in set(stopwords.words('english')) and word not in string.punctuation:\n",
|
||||
" total_words.append(word)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.preprocessing import LabelEncoder\n",
|
||||
"\n",
|
||||
"encoder = LabelEncoder()\n",
|
||||
"df['Labels']=encoder.fit_transform(df.Category)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To check if we successfully convert our labels into integers, let's run info() method of the pandas one more time. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 962 entries, 0 to 961\n",
|
||||
"Data columns (total 4 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 Category 962 non-null object\n",
|
||||
" 1 Resume 962 non-null object\n",
|
||||
" 2 Cleaned Resume 962 non-null object\n",
|
||||
" 3 Labels 962 non-null int64 \n",
|
||||
"dtypes: int64(1), object(3)\n",
|
||||
"memory usage: 30.2+ KB\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"15 84\n",
|
||||
"23 70\n",
|
||||
"8 55\n",
|
||||
"20 48\n",
|
||||
"24 45\n",
|
||||
"Name: Labels, dtype: int64"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.Labels.value_counts()[:5]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"df.sample() will help us to have a look at two random resumes, and the labels. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Category</th>\n",
|
||||
" <th>Resume</th>\n",
|
||||
" <th>Cleaned Resume</th>\n",
|
||||
" <th>Labels</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>227</th>\n",
|
||||
" <td>Sales</td>\n",
|
||||
" <td>KEY SKILLS: ⢠Planning & Strategizing ⢠Presentation skill ⢠Client relationship ⢠Energy level ⢠Enquiry Generation ⢠Achieving Targets QUALIFICATIONS: A university in marketing or business studies is preferred or a minimum of three years of related experience in sales & marketing sector. Problem - solving and analytical skills to interpret sales performance and market trend information. Proven ability to motivate and lead the sales team. Experience in developing marketing and sales strategies. Excellent oral and written communication skills, plus a good working knowledge of Microsoft Office. Computer KNOWLEDGE ⢠Knowledge of MS Excel, MS Word, MS PowerPoint achievements and Interests ⢠I played Cricket for National Team (Maharashtra Cricket Association) ⢠Played Reg...</td>\n",
|
||||
" <td>key skills planning strategizing presentation skill client relationship energy level enquiry generation achieving targets qualifications a university in marketing or business studies is preferred or a minimum of three years of related experience in sales marketing sector problem solving and analytical skills to interpret sales performance and market trend information proven ability to motivate and lead the sales team experience in developing marketing and sales strategies excellent oral and written communication skills plus a good working knowledge of microsoft office computer knowledge knowledge of ms excel ms word ms powerpoint achievements and interests i played cricket for national team maharashtra cricket association played regional level cricket tournament thee times for school t...</td>\n",
|
||||
" <td>22</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>140</th>\n",
|
||||
" <td>Web Designing</td>\n",
|
||||
" <td>Technical Skills Web Technologies: Angular JS, HTML5, CSS3, SASS, Bootstrap, Jquery, Javascript. Software: Brackets, Visual Studio, Photoshop, Visual Studio Code Education Details \\r\\nJanuary 2015 B.E CSE Nagpur, Maharashtra G.H.Raisoni College of Engineering\\r\\nOctober 2009 Photography Competition Click Nagpur, Maharashtra Maharashtra State Board\\r\\n College Magazine OCEAN\\r\\nWeb Designer \\r\\n\\r\\nWeb Designer - Trust Systems and Software\\r\\nSkill Details \\r\\nPHOTOSHOP- Exprience - 28 months\\r\\nBOOTSTRAP- Exprience - 6 months\\r\\nHTML5- Exprience - 6 months\\r\\nJAVASCRIPT- Exprience - 6 months\\r\\nCSS3- Exprience - Less than 1 year months\\r\\nAngular 4- Exprience - Less than 1 year monthsCompany Details \\r\\ncompany - Trust Systems and Software\\r\\ndescription - Projects worked on:\\r\\n1....</td>\n",
|
||||
" <td>technical skills web technologies angular js html css sass bootstrap jquery javascript software brackets visual studio photoshop visual studio code education details january b e cse nagpur maharashtra g h raisoni college of engineering october photography competition click nagpur maharashtra maharashtra state board college magazine ocean web designer web designer trust systems and software skill details photoshop exprience months bootstrap exprience months html exprience months javascript exprience months css exprience less than year months angular exprience less than year monthscompany details company trust systems and software description projects worked on trustbank cbs project description trustbank cbs is a core banking solution by trust systems roles and responsibility r...</td>\n",
|
||||
" <td>24</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Category \\\n",
|
||||
"227 Sales \n",
|
||||
"140 Web Designing \n",
|
||||
"\n",
|
||||
" Resume \\\n",
|
||||
"227 KEY SKILLS: ⢠Planning & Strategizing ⢠Presentation skill ⢠Client relationship ⢠Energy level ⢠Enquiry Generation ⢠Achieving Targets QUALIFICATIONS: A university in marketing or business studies is preferred or a minimum of three years of related experience in sales & marketing sector. Problem - solving and analytical skills to interpret sales performance and market trend information. Proven ability to motivate and lead the sales team. Experience in developing marketing and sales strategies. Excellent oral and written communication skills, plus a good working knowledge of Microsoft Office. Computer KNOWLEDGE ⢠Knowledge of MS Excel, MS Word, MS PowerPoint achievements and Interests ⢠I played Cricket for National Team (Maharashtra Cricket Association) ⢠Played Reg... \n",
|
||||
"140 Technical Skills Web Technologies: Angular JS, HTML5, CSS3, SASS, Bootstrap, Jquery, Javascript. Software: Brackets, Visual Studio, Photoshop, Visual Studio Code Education Details \\r\\nJanuary 2015 B.E CSE Nagpur, Maharashtra G.H.Raisoni College of Engineering\\r\\nOctober 2009 Photography Competition Click Nagpur, Maharashtra Maharashtra State Board\\r\\n College Magazine OCEAN\\r\\nWeb Designer \\r\\n\\r\\nWeb Designer - Trust Systems and Software\\r\\nSkill Details \\r\\nPHOTOSHOP- Exprience - 28 months\\r\\nBOOTSTRAP- Exprience - 6 months\\r\\nHTML5- Exprience - 6 months\\r\\nJAVASCRIPT- Exprience - 6 months\\r\\nCSS3- Exprience - Less than 1 year months\\r\\nAngular 4- Exprience - Less than 1 year monthsCompany Details \\r\\ncompany - Trust Systems and Software\\r\\ndescription - Projects worked on:\\r\\n1.... \n",
|
||||
"\n",
|
||||
" Cleaned Resume \\\n",
|
||||
"227 key skills planning strategizing presentation skill client relationship energy level enquiry generation achieving targets qualifications a university in marketing or business studies is preferred or a minimum of three years of related experience in sales marketing sector problem solving and analytical skills to interpret sales performance and market trend information proven ability to motivate and lead the sales team experience in developing marketing and sales strategies excellent oral and written communication skills plus a good working knowledge of microsoft office computer knowledge knowledge of ms excel ms word ms powerpoint achievements and interests i played cricket for national team maharashtra cricket association played regional level cricket tournament thee times for school t... \n",
|
||||
"140 technical skills web technologies angular js html css sass bootstrap jquery javascript software brackets visual studio photoshop visual studio code education details january b e cse nagpur maharashtra g h raisoni college of engineering october photography competition click nagpur maharashtra maharashtra state board college magazine ocean web designer web designer trust systems and software skill details photoshop exprience months bootstrap exprience months html exprience months javascript exprience months css exprience less than year months angular exprience less than year monthscompany details company trust systems and software description projects worked on trustbank cbs project description trustbank cbs is a core banking solution by trust systems roles and responsibility r... \n",
|
||||
"\n",
|
||||
" Labels \n",
|
||||
"227 22 \n",
|
||||
"140 24 "
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.sample(2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(721,)\n",
|
||||
"(721,)\n",
|
||||
"(241,)\n",
|
||||
"(241,)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"text = df[\"Cleaned Resume\"].values\n",
|
||||
"labels = df[\"Labels\"].values\n",
|
||||
"\n",
|
||||
"text_train,text_test,y_train,y_test = train_test_split(text, labels, random_state=0, test_size=0.25, stratify=df.Labels)\n",
|
||||
"print(text_train.shape)\n",
|
||||
"print(y_train.shape)\n",
|
||||
"print(text_test.shape)\n",
|
||||
"print(y_test.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Vectorization\n",
|
||||
"\n",
|
||||
"The simplest form of text vectorization is the Bag of Words (BoW) model. Sklearn library makes the BoW application very easy with CountVectorizer, TfidfVectorizer, and TfidfTransformer. Let's use the TfidfVectorizer with the default tokenizer, and by removing the English stopwords. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
||||
"\n",
|
||||
"word_vectorizer = TfidfVectorizer(\n",
|
||||
" sublinear_tf=True,\n",
|
||||
" use_idf = True,\n",
|
||||
" stop_words='english',\n",
|
||||
" max_features=1000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(721, 1000)"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_train = word_vectorizer.fit_transform(text_train)\n",
|
||||
"X_train.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(241, 1000)"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X_test = word_vectorizer.transform(text_test)\n",
|
||||
"X_test.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Accuracy of MultinomialNB Classifier on training set: 0.99\n",
|
||||
"Accuracy of MultinomialNB Classifier on test set: 0.97\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.naive_bayes import MultinomialNB\n",
|
||||
"from sklearn.multiclass import OneVsRestClassifier\n",
|
||||
"\n",
|
||||
"clf = OneVsRestClassifier(MultinomialNB()).fit(X_train, y_train)\n",
|
||||
"prediction_mnb = clf.predict(X_test)\n",
|
||||
"print('Accuracy of MultinomialNB Classifier on training set: {:.2f}'.format(clf.score(X_train, y_train)))\n",
|
||||
"print('Accuracy of MultinomialNB Classifier on test set: {:.2f}'.format(clf.score(X_test, y_test)))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sample = [\"Implement a login page with form validation using React.js.\",\n",
|
||||
"\"Set up a RESTful API using Node.js and Express.js.\",\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\"Design a user-friendly dashboard layout with interactive charts and graphs.\",\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\"Implement authentication and authorization using JWT and bcrypt.\",\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\"Create a database schema for storing employee data and performance metrics.\",\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\"Design a responsive user interface with a modern and clean aesthetic.\",\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\"Implement data visualization using a charting library like D3.js.\",\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" \"Set up a MongoDB database for storing and querying analytics data.\",\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" \"Create wireframes for the employee profile page displaying key performance indicators.\",\n",
|
||||
"\n",
|
||||
"\"Implement CRUD operations for managing employee data using REST APIs.\",\n",
|
||||
"\n",
|
||||
" \"Design a visually appealing landing page with clear call-to-action buttons.\",\n",
|
||||
"\n",
|
||||
" \"Implement user authentication using Firebase Authentication.\",\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\"Set up a Next.js project with server-side rendering for improved performance.\",\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\"Design a mobile-friendly layout for seamless viewing on different devices.\",\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" \"Implement a search functionality for easily finding employee records.\"]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Implement a login page with form validation using React.js.\n",
|
||||
"Predicted labels: ['Java Developer']\n",
|
||||
"Set up a RESTful API using Node.js and Express.js.\n",
|
||||
"Predicted labels: ['Java Developer']\n",
|
||||
"Design a user-friendly dashboard layout with interactive charts and graphs.\n",
|
||||
"Predicted labels: ['Electrical Engineering']\n",
|
||||
"Implement authentication and authorization using JWT and bcrypt.\n",
|
||||
"Predicted labels: ['Testing']\n",
|
||||
"Create a database schema for storing employee data and performance metrics.\n",
|
||||
"Predicted labels: ['Hadoop']\n",
|
||||
"Design a responsive user interface with a modern and clean aesthetic.\n",
|
||||
"Predicted labels: ['Web Designing']\n",
|
||||
"Implement data visualization using a charting library like D3.js.\n",
|
||||
"Predicted labels: ['Java Developer']\n",
|
||||
"Set up a MongoDB database for storing and querying analytics data.\n",
|
||||
"Predicted labels: ['Python Developer']\n",
|
||||
"Create wireframes for the employee profile page displaying key performance indicators.\n",
|
||||
"Predicted labels: ['DevOps Engineer']\n",
|
||||
"Implement CRUD operations for managing employee data using REST APIs.\n",
|
||||
"Predicted labels: ['Python Developer']\n",
|
||||
"Design a visually appealing landing page with clear call-to-action buttons.\n",
|
||||
"Predicted labels: ['Testing']\n",
|
||||
"Implement user authentication using Firebase Authentication.\n",
|
||||
"Predicted labels: ['Testing']\n",
|
||||
"Set up a Next.js project with server-side rendering for improved performance.\n",
|
||||
"Predicted labels: ['Java Developer']\n",
|
||||
"Design a mobile-friendly layout for seamless viewing on different devices.\n",
|
||||
"Predicted labels: ['Testing']\n",
|
||||
"Implement a search functionality for easily finding employee records.\n",
|
||||
"Predicted labels: ['Testing']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for x in sample:\n",
|
||||
" sample_text = x\n",
|
||||
"\n",
|
||||
" # Vectorize the preprocessed text (similar to vectorizing training data)\n",
|
||||
" vectorized_text = word_vectorizer.transform([sample_text]) # Assuming you have previously defined the vectorizer\n",
|
||||
"\n",
|
||||
" # Use the trained model to make predictions on the vectorized text\n",
|
||||
" predicted_labels = clf.predict(vectorized_text)\n",
|
||||
"\n",
|
||||
" # Convert the predicted labels back to their original format\n",
|
||||
" predicted_labels = encoder.inverse_transform(predicted_labels)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" # Print the predicted labels\n",
|
||||
" print(x)\n",
|
||||
" print(\"Predicted labels:\", predicted_labels)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
42105
UpdatedResumeDataset2.csv
Normal file
42105
UpdatedResumeDataset2.csv
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user