Skip to content

Commit

Permalink
Merge pull request #36 from KjetilIN/21-feat-dataframe-for-reading-files
Browse files Browse the repository at this point in the history
Dataframe implementation
  • Loading branch information
KjetilIN authored Oct 3, 2024
2 parents 3ee23d0 + fa8a1ca commit ef5bd91
Show file tree
Hide file tree
Showing 15 changed files with 1,819 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,6 @@ Cargo.lock
/docs/*.gz
/docs/*.fdb_latexmk
/docs/*.out

# Ignore checkpoint folder
.ipynb_checkpoints/
2 changes: 1 addition & 1 deletion benches/matrix_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use criterion::{self, black_box, criterion_group, criterion_main, Criterion};
use rustic_ml::matrix::Matrix;
use rustic_ml::data_utils::matrix::Matrix;

fn benchmark_matrix_multiplication(c: &mut Criterion) {
// Define matrix sizes and data
Expand Down
25 changes: 25 additions & 0 deletions datasets/european_cities.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Barcelona;Belgrade;Berlin;Brussels;Bucharest;Budapest;Copenhagen;Dublin;Hamburg;Istanbul;Kyiv;London;Madrid;Milan;Moscow;Munich;Paris;Prague;Rome;Saint Petersburg;Sofia;Stockholm;Vienna;Warsaw
0;1528.13;1497.61;1062.89;1968.42;1498.79;1757.54;1469.29;1471.78;2230.42;2391.06;1137.67;504.64;725.12;3006.93;1054.55;831.59;1353.90;856.69;2813.02;1745.55;2276.51;1347.43;1862.33
1528.13;0;999.25;1372.59;447.34;316.41;1327.24;2145.39;1229.93;809.48;976.02;1688.97;2026.94;885.32;1710.99;773.33;1445.70;738.10;721.55;1797.75;329.46;1620.96;489.28;826.66
1497.61;999.25;0;651.62;1293.40;689.06;354.03;1315.16;254.51;1735.01;1204.00;929.97;1867.69;840.72;1607.99;501.97;876.96;280.34;1181.67;1319.62;1318.67;810.38;523.61;516.06
1062.89;1372.59;651.62;0;1769.69;1131.52;766.67;773.20;489.76;2178.85;1836.20;318.72;1314.30;696.61;2253.26;601.87;261.29;721.08;1171.34;1903.66;1697.83;1280.88;914.81;1159.85
1968.42;447.34;1293.40;1769.69;0;639.77;1571.54;2534.72;1544.17;445.62;744.44;2088.42;2469.71;1331.46;1497.56;1186.37;1869.95;1076.82;1137.38;1740.39;296.68;1742.25;855.32;946.12
1498.79;316.41;689.06;1131.52;639.77;0;1011.31;1894.95;927.92;1064.76;894.29;1450.12;1975.38;788.56;1565.19;563.93;1247.61;443.26;811.11;1556.51;629.63;1316.59;216.98;545.29
1757.54;1327.24;354.03;766.67;1571.54;1011.31;0;1238.38;287.97;2017.17;1326.33;955.13;2071.75;1157.89;1558.52;838.00;1025.90;633.05;1529.69;1143.40;1635.54;521.68;868.87;667.80
1469.29;2145.39;1315.16;773.20;2534.72;1894.95;1238.38;0;1073.36;2950.11;2513.69;462.60;1449.96;1413.37;2792.41;1374.91;776.83;1465.61;1882.22;2314.19;2471.02;1626.56;1680.00;1823.72
1471.78;1229.93;254.51;489.76;1544.17;927.92;287.97;1073.36;0;1983.75;1440.34;720.12;1785.33;900.01;1779.93;610.17;744.63;492.25;1307.51;1414.16;1554.82;809.65;742.79;750.49
2230.42;809.48;1735.01;2178.85;445.62;1064.76;2017.17;2950.11;1983.75;0;1052.95;2496.39;2734.60;1669.43;1753.97;1582.16;2253.98;1507.55;1373.81;2099.29;502.61;2171.65;1273.88;1386.08
2391.06;976.02;1204.00;1836.20;744.44;894.29;1326.33;2513.69;1440.34;1052.95;0;2131.20;2859.32;1672.69;756.61;1391.36;2022.76;1138.61;1673.74;1051.39;1020.76;1265.79;1052.76;690.12
1137.67;1688.97;929.97;318.72;2088.42;1450.12;955.13;462.60;720.12;2496.39;2131.20;0;1263.37;957.91;2498.32;916.23;340.55;1034.57;1431.21;2093.69;2012.70;1431.07;1233.48;1445.85
504.64;2026.94;1867.69;1314.30;2469.71;1975.38;2071.75;1449.96;1785.33;2734.60;2859.32;1263.37;0;1187.73;3437.70;1484.53;1053.40;1773.73;1360.80;3183.43;2250.10;2591.53;1807.09;2288.42
725.12;885.32;840.72;696.61;1331.46;788.56;1157.89;1413.37;900.01;1669.43;1672.69;957.91;1187.73;0;2283.19;348.89;641.31;646.04;476.00;2122.15;1166.83;1650.12;623.36;1143.01
3006.93;1710.99;1607.99;2253.26;1497.56;1565.19;1558.52;2792.41;1779.93;1753.97;756.61;2498.32;3437.70;2283.19;0;1957.15;2484.92;1664.04;2374.26;632.59;1777.35;1227.38;1669.22;1149.41
1054.55;773.33;501.97;601.87;1186.37;563.93;838.00;1374.91;610.17;1582.16;1391.36;916.23;1484.53;348.89;1957.15;0;685.14;300.16;698.04;1773.83;1096.54;1311.80;354.42;809.02
831.59;1445.70;876.96;261.29;1869.95;1247.61;1025.90;776.83;744.63;2253.98;2022.76;340.55;1053.40;641.31;2484.92;685.14;0;885.38;1105.76;2157.99;1758.03;1541.83;1033.73;1365.91
1353.90;738.10;280.34;721.08;1076.82;443.26;633.05;1465.61;492.25;1507.55;1138.61;1034.57;1773.73;646.04;1664.04;300.16;885.38;0;922.00;1476.73;1064.43;1052.85;250.71;514.69
856.69;721.55;1181.67;1171.34;1137.38;811.11;1529.69;1882.22;1307.51;1373.81;1673.74;1431.21;1360.80;476.00;2374.26;698.04;1105.76;922.00;0;2339.22;894.06;1974.79;763.26;1316.24
2813.02;1797.75;1319.62;1903.66;1740.39;1556.51;1143.40;2314.19;1414.16;2099.29;1051.39;2093.69;3183.43;2122.15;632.59;1773.83;2157.99;1476.73;2339.22;0;1969.82;688.33;1577.56;1023.41
1745.55;329.46;1318.67;1697.83;296.68;629.63;1635.54;2471.02;1554.82;502.61;1020.76;2012.70;2250.10;1166.83;1777.35;1096.54;1758.03;1064.43;894.06;1969.82;0;1884.91;817.45;1076.99
2276.51;1620.96;810.38;1280.88;1742.25;1316.59;521.68;1626.56;809.65;2171.65;1265.79;1431.07;2591.53;1650.12;1227.38;1311.80;1541.83;1052.85;1974.79;688.33;1884.91;0;1241.90;808.14
1347.43;489.28;523.61;914.81;855.32;216.98;868.87;1680.00;742.79;1273.88;1052.76;1233.48;1807.09;623.36;1669.22;354.42;1033.73;250.71;763.26;1577.56;817.45;1241.90;0;557.43
1862.33;826.66;516.06;1159.85;946.12;545.29;667.80;1823.72;750.49;1386.08;690.12;1445.85;2288.42;1143.01;1149.41;809.02;1365.91;514.69;1316.24;1023.41;1076.99;808.14;557.43;0
25 changes: 25 additions & 0 deletions datasets/european_cities.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Barcelona Belgrade Berlin Brussels Bucharest Budapest Copenhagen Dublin Hamburg Istanbul Kyiv London Madrid Milan Moscow Munich Paris Prague Rome Saint Petersburg Sofia Stockholm Vienna Warsaw
0 1528.13 1497.61 1062.89 1968.42 1498.79 1757.54 1469.29 1471.78 2230.42 2391.06 1137.67 504.64 725.12 3006.93 1054.55 831.59 1353.90 856.69 2813.02 1745.55 2276.51 1347.43 1862.33
1528.13 0 999.25 1372.59 447.34 316.41 1327.24 2145.39 1229.93 809.48 976.02 1688.97 2026.94 885.32 1710.99 773.33 1445.70 738.10 721.55 1797.75 329.46 1620.96 489.28 826.66
1497.61 999.25 0 651.62 1293.40 689.06 354.03 1315.16 254.51 1735.01 1204.00 929.97 1867.69 840.72 1607.99 501.97 876.96 280.34 1181.67 1319.62 1318.67 810.38 523.61 516.06
1062.89 1372.59 651.62 0 1769.69 1131.52 766.67 773.20 489.76 2178.85 1836.20 318.72 1314.30 696.61 2253.26 601.87 261.29 721.08 1171.34 1903.66 1697.83 1280.88 914.81 1159.85
1968.42 447.34 1293.40 1769.69 0 639.77 1571.54 2534.72 1544.17 445.62 744.44 2088.42 2469.71 1331.46 1497.56 1186.37 1869.95 1076.82 1137.38 1740.39 296.68 1742.25 855.32 946.12
1498.79 316.41 689.06 1131.52 639.77 0 1011.31 1894.95 927.92 1064.76 894.29 1450.12 1975.38 788.56 1565.19 563.93 1247.61 443.26 811.11 1556.51 629.63 1316.59 216.98 545.29
1757.54 1327.24 354.03 766.67 1571.54 1011.31 0 1238.38 287.97 2017.17 1326.33 955.13 2071.75 1157.89 1558.52 838.00 1025.90 633.05 1529.69 1143.40 1635.54 521.68 868.87 667.80
1469.29 2145.39 1315.16 773.20 2534.72 1894.95 1238.38 0 1073.36 2950.11 2513.69 462.60 1449.96 1413.37 2792.41 1374.91 776.83 1465.61 1882.22 2314.19 2471.02 1626.56 1680.00 1823.72
1471.78 1229.93 254.51 489.76 1544.17 927.92 287.97 1073.36 0 1983.75 1440.34 720.12 1785.33 900.01 1779.93 610.17 744.63 492.25 1307.51 1414.16 1554.82 809.65 742.79 750.49
2230.42 809.48 1735.01 2178.85 445.62 1064.76 2017.17 2950.11 1983.75 0 1052.95 2496.39 2734.60 1669.43 1753.97 1582.16 2253.98 1507.55 1373.81 2099.29 502.61 2171.65 1273.88 1386.08
2391.06 976.02 1204.00 1836.20 744.44 894.29 1326.33 2513.69 1440.34 1052.95 0 2131.20 2859.32 1672.69 756.61 1391.36 2022.76 1138.61 1673.74 1051.39 1020.76 1265.79 1052.76 690.12
1137.67 1688.97 929.97 318.72 2088.42 1450.12 955.13 462.60 720.12 2496.39 2131.20 0 1263.37 957.91 2498.32 916.23 340.55 1034.57 1431.21 2093.69 2012.70 1431.07 1233.48 1445.85
504.64 2026.94 1867.69 1314.30 2469.71 1975.38 2071.75 1449.96 1785.33 2734.60 2859.32 1263.37 0 1187.73 3437.70 1484.53 1053.40 1773.73 1360.80 3183.43 2250.10 2591.53 1807.09 2288.42
725.12 885.32 840.72 696.61 1331.46 788.56 1157.89 1413.37 900.01 1669.43 1672.69 957.91 1187.73 0 2283.19 348.89 641.31 646.04 476.00 2122.15 1166.83 1650.12 623.36 1143.01
3006.93 1710.99 1607.99 2253.26 1497.56 1565.19 1558.52 2792.41 1779.93 1753.97 756.61 2498.32 3437.70 2283.19 0 1957.15 2484.92 1664.04 2374.26 632.59 1777.35 1227.38 1669.22 1149.41
1054.55 773.33 501.97 601.87 1186.37 563.93 838.00 1374.91 610.17 1582.16 1391.36 916.23 1484.53 348.89 1957.15 0 685.14 300.16 698.04 1773.83 1096.54 1311.80 354.42 809.02
831.59 1445.70 876.96 261.29 1869.95 1247.61 1025.90 776.83 744.63 2253.98 2022.76 340.55 1053.40 641.31 2484.92 685.14 0 885.38 1105.76 2157.99 1758.03 1541.83 1033.73 1365.91
1353.90 738.10 280.34 721.08 1076.82 443.26 633.05 1465.61 492.25 1507.55 1138.61 1034.57 1773.73 646.04 1664.04 300.16 885.38 0 922.00 1476.73 1064.43 1052.85 250.71 514.69
856.69 721.55 1181.67 1171.34 1137.38 811.11 1529.69 1882.22 1307.51 1373.81 1673.74 1431.21 1360.80 476.00 2374.26 698.04 1105.76 922.00 0 2339.22 894.06 1974.79 763.26 1316.24
2813.02 1797.75 1319.62 1903.66 1740.39 1556.51 1143.40 2314.19 1414.16 2099.29 1051.39 2093.69 3183.43 2122.15 632.59 1773.83 2157.99 1476.73 2339.22 0 1969.82 688.33 1577.56 1023.41
1745.55 329.46 1318.67 1697.83 296.68 629.63 1635.54 2471.02 1554.82 502.61 1020.76 2012.70 2250.10 1166.83 1777.35 1096.54 1758.03 1064.43 894.06 1969.82 0 1884.91 817.45 1076.99
2276.51 1620.96 810.38 1280.88 1742.25 1316.59 521.68 1626.56 809.65 2171.65 1265.79 1431.07 2591.53 1650.12 1227.38 1311.80 1541.83 1052.85 1974.79 688.33 1884.91 0 1241.90 808.14
1347.43 489.28 523.61 914.81 855.32 216.98 868.87 1680.00 742.79 1273.88 1052.76 1233.48 1807.09 623.36 1669.22 354.42 1033.73 250.71 763.26 1577.56 817.45 1241.90 0 557.43
1862.33 826.66 516.06 1159.85 946.12 545.29 667.80 1823.72 750.49 1386.08 690.12 1445.85 2288.42 1143.01 1149.41 809.02 1365.91 514.69 1316.24 1023.41 1076.99 808.14 557.43 0
194 changes: 194 additions & 0 deletions examples/DataframeLab.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "e237e057-796b-4e8a-89f8-076882ea2f9c",
"metadata": {},
"source": [
"# Using `rustic_ml` in a Jupyter notebook"
]
},
{
"cell_type": "markdown",
"id": "733b683a-a558-4596-96e2-6faca1e4c29a",
"metadata": {},
"source": [
"First step is to include the create to the notebook. \n",
"To get started see `README.md` on how to setup the notebook environment. \n",
"When it is installed, run `jupyter lab` to start the notebook in the browser.\n",
"Create a new notebook with the Rust option, and set the depencency to: \n",
"```rust\n",
":dep rustic_ml = \"0.x.x\"\n",
"extern crate rustic_ml;\n",
"```\n",
"\n",
"After this, you will be able to use the libaries functionality in the following files.\n",
"\n",
"Since this is a example within the library iteself, we import the libary using the path:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f541ad4f-9472-4cf6-a787-78a54389be91",
"metadata": {
"vscode": {
"languageId": "rust"
}
},
"outputs": [],
"source": [
":dep rustic_ml = { path = \"../\" }\n",
"extern crate rustic_ml;"
]
},
{
"cell_type": "markdown",
"id": "67ec5c36-a929-4ee0-92bb-58cdcc7d5a5b",
"metadata": {},
"source": [
"Next, include the function that we are going to use from the library: "
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "3f2c42e6-7cf4-45b5-bdfb-1228b2bce10d",
"metadata": {
"vscode": {
"languageId": "rust"
}
},
"outputs": [],
"source": [
"use rustic_ml::data_utils::dataframe::Dataframe;"
]
},
{
"cell_type": "markdown",
"id": "76a9d147-f8ce-4bd5-9f12-dee698b0a942",
"metadata": {},
"source": [
"Reading a csv file:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5d58a4d-8e12-44b8-9974-6408333cefc4",
"metadata": {},
"outputs": [],
"source": [
"let path = String::from(\"../datasets/european_cities.csv\");\n",
"let dataframe = Dataframe::from_csv(path).unwrap();"
]
},
{
"cell_type": "markdown",
"id": "152858c3-707d-4563-8e69-5f2681797399",
"metadata": {},
"source": [
"Run the following codeblock to see the information about the dataframe:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "11070ca7-ffcc-41bd-851e-e67c7fb89d48",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Column Name Type None Some Total Length \n",
"-----------------------------------------------------------------\n",
"Barcelona Float 0 24 24 \n",
"Belgrade Float 0 24 24 \n",
"Berlin Float 0 24 24 \n",
"Brussels Float 0 24 24 \n",
"Bucharest Float 0 24 24 \n",
"Budapest Float 0 24 24 \n",
"Copenhagen Float 0 24 24 \n",
"Dublin Float 0 24 24 \n",
"Hamburg Float 0 24 24 \n",
"Istanbul Float 0 24 24 \n",
"Kyiv Float 0 24 24 \n",
"London Float 0 24 24 \n",
"Madrid Float 0 24 24 \n",
"Milan Float 0 24 24 \n",
"Moscow Float 0 24 24 \n",
"Munich Float 0 24 24 \n",
"Paris Float 0 24 24 \n",
"Prague Float 0 24 24 \n",
"Rome Float 0 24 24 \n",
"Saint Petersburg Float 0 24 24 \n",
"Sofia Float 0 24 24 \n",
"Stockholm Float 0 24 24 \n",
"Vienna Float 0 24 24 \n",
"Warsaw Float 0 24 24 \n"
]
},
{
"data": {
"text/plain": [
"()"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataframe.info()"
]
},
{
"cell_type": "markdown",
"id": "546e0b63-3577-456f-8a1f-83d306f0e7af",
"metadata": {},
"source": [
"To see the memory usage of the dataframe, we call `memory_usage()`:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "63ed09c7-6779-4fa0-a4c4-fb77d421f43a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4608"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataframe.memory_usage()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Rust",
"language": "rust",
"name": "rust"
},
"language_info": {
"codemirror_mode": "rust",
"file_extension": ".rs",
"mimetype": "text/rust",
"name": "Rust",
"pygment_lexer": "rust",
"version": ""
}
},
"nbformat": 4,
"nbformat_minor": 5
}
19 changes: 19 additions & 0 deletions examples/dataframe_read_from_csv.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use rustic_ml::data_utils::dataframe::Dataframe;

fn main() {
let path = String::from("./datasets/european_cities.csv");
let dataframe = Dataframe::from_csv(path).unwrap();

// Print the info
dataframe.info();

// We can also get the total amount of bytes used
let total_bytes_used = dataframe.memory_usage();
println!(
"\nMemory usage for the dataframe: {} bytes",
total_bytes_used
);

// Lets print the first five records with the head method:
dataframe.head();
}
Loading

0 comments on commit ef5bd91

Please sign in to comment.