discretized data

This commit is contained in:
Rushil Umaretiya 2021-10-04 15:21:33 +00:00
parent 176d4692b5
commit 7ef30cb5d1
4 changed files with 10403 additions and 10342 deletions

16
.gitignore vendored
View File

@ -1,3 +1,4 @@
.ipynb_checkpoints
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
@ -20,7 +21,6 @@ parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
@ -50,6 +50,7 @@ coverage.xml
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
@ -72,6 +73,7 @@ instance/
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
@ -82,7 +84,9 @@ profile_default/
ipython_config.py
# pyenv
.python-version
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
@ -127,3 +131,11 @@ dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
.vscode/

49
data_preprossing.ipynb Normal file
View File

@ -0,0 +1,49 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv(\"responses_clean.csv\", sep=\"|\")\n",
"\n",
"labels = [\"Very Low\", \"Low\", \"Mid\", \"High\", \"Very High\"]\n",
"\n",
"df['SalaryUSD'] = pd.qcut(df['SalaryUSD'], q = 5, labels=labels)\n",
"\n",
"df.to_csv(\"responses_discretized.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (default)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

File diff suppressed because it is too large Load Diff

10340
responses_discretized.csv Normal file

File diff suppressed because it is too large Load Diff