diff --git a/backend/Cargo.lock b/backend/Cargo.lock index 3848171..6b08d73 100644 --- a/backend/Cargo.lock +++ b/backend/Cargo.lock @@ -122,6 +122,7 @@ dependencies = [ "once_cell", "reqwest", "rocket", + "scraper", "serde", "sqlx", ] @@ -345,6 +346,29 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf 0.11.2", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn 2.0.29", +] + [[package]] name = "der" version = "0.7.8" @@ -362,6 +386,17 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "devise" version = "0.4.1" @@ -413,6 +448,27 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "dtoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" + +[[package]] +name = "dtoa-short" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbaceec3c6e4211c79e7b1800fb9680527106beb2f9c51904a3210c03a448c74" +dependencies = [ + "dtoa", +] + +[[package]] +name = "ego-tree" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" + [[package]] name = "either" version = "1.9.0" @@ -537,6 +593,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.28" @@ -624,6 +690,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generator" version = "0.7.5" @@ -647,6 +722,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.10" @@ -762,6 +846,20 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "html5ever" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "http" version = "0.2.9" @@ -1024,6 +1122,26 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +dependencies = [ + "log", + "phf 0.10.1", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "matchers" version = "0.1.0" @@ -1124,6 +1242,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + [[package]] name = "nom" version = "7.1.3" @@ -1334,6 +1458,86 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared 0.10.0", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_macros", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", + "proc-macro2", + "quote", + "syn 2.0.29", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.3" @@ -1399,6 +1603,12 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro2" version = "1.0.66" @@ -1814,6 +2024,23 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c95a930e03325234c18c7071fd2b60118307e025d6fff3e12745ffbf63a3d29c" +dependencies = [ + "ahash", + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "once_cell", + "selectors", + "smallvec", + "tendril", +] + [[package]] name = "sct" version = "0.7.0" @@ -1847,6 +2074,25 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06" +dependencies = [ + "bitflags 2.4.0", + "cssparser", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf 0.10.1", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "serde" version = "1.0.188" @@ -1899,6 +2145,15 @@ dependencies = [ "serde", ] +[[package]] +name = "servo_arc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sha1" version = "0.10.5" @@ -1949,6 +2204,12 @@ dependencies = [ "rand_core", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.9" @@ -2231,6 +2492,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "state" version = "0.5.3" @@ -2240,6 +2507,32 @@ dependencies = [ "loom", ] +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", +] + [[package]] name = "stringprep" version = "0.1.3" @@ -2291,6 +2584,17 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "thiserror" version = "1.0.47" @@ -2600,6 +2904,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + [[package]] name = "unicode-xid" version = "0.2.4" @@ -2629,6 +2939,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "uuid" version = "1.4.1" diff --git a/backend/Cargo.toml b/backend/Cargo.toml index 931aa50..2ef0663 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -13,3 +13,4 @@ once_cell = "1.18.0" dotenvy = "0.15.7" serde = "1.0.188" chrono = "0.4.27" +scraper = "0.17.1" diff --git a/backend/src/model/person.rs b/backend/src/model/person.rs index a455550..3f89e6f 100644 --- a/backend/src/model/person.rs +++ b/backend/src/model/person.rs @@ -23,7 +23,7 @@ pub struct Person { /// Example: `Gomez` pub person_maternal_surname: String, /// Id of the online classroom user id linked to this user - pub person_classroom_id: Option + pub person_classroom_id: Option, } impl Person { diff --git a/backend/src/online_classroom/users.rs b/backend/src/online_classroom/users.rs index f40f8fb..cc7aa69 100644 --- a/backend/src/online_classroom/users.rs +++ b/backend/src/online_classroom/users.rs @@ -1,6 +1,15 @@ -use rocket::{http::Status, serde::json::Json}; - use super::{json_result::JsonResult, session::request}; +use rocket::{http::Status, serde::json::Json}; +use scraper::{ElementRef, Html, Selector}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize)] +pub struct ClassroomPerson { + name: String, + surname: String, + username: String, + user_id: String, +} // Instead of requesting pages and managing session & cookies manually, // create a wrapper that: @@ -10,16 +19,111 @@ use super::{json_result::JsonResult, session::request}; // - Returns the html string, or an error #[get("/classroom/users/")] -pub async fn get_users(full_name: String) -> (Status, Json>) { - let html = request(format!("/main/admin/user_list.php?keyword={}&submit=&_qf__search_simple=", full_name)).await; +pub async fn get_users(full_name: String) -> (Status, Json>>) { + let html = request(format!( + "/main/admin/user_list.php?keyword={}&submit=&_qf__search_simple=", + full_name + )) + .await; match html { - Ok(html) => { - println!("{}", html); - (Status::Ok, JsonResult::ok(())) - } - Err(reason) => { - (Status::InternalServerError, JsonResult::err(reason)) - } + Ok(html) => match parse_users(&html) { + Ok(users) => (Status::Ok, JsonResult::ok(users)), + Err(reason) => { + // println!("{}", html); + (Status::InternalServerError, JsonResult::err(reason)) + } + }, + Err(reason) => (Status::InternalServerError, JsonResult::err(reason)), } } + +fn parse_users(file: &str) -> Result, String> { + // Selectors + let Ok(form_selector) = Selector::parse("form#form_users_id") else { + return Err("Error parsing form#form_users_id selector".into()); + }; + let Ok(tr_selector) = Selector::parse("tr:not(:first-child)") else { + return Err("Error parsing tr:not(:first-child) selector".into()); + }; + let Ok(td_selector) = Selector::parse("td") else { + return Err("Error parsing td selector".into()); + }; + + let fragment = Html::parse_document(file); + + let form_element = match fragment.select(&form_selector).next() { + Some(el) => el, + None => return Err("Error selecting form#form_users_id: not found".into()), + }; + + let mut result_vec = Vec::new(); + for element in form_element.select(&tr_selector) { + let td_vec: Vec<_> = element.select(&td_selector).collect(); + + if td_vec.len() != 12 { + return Err(format!( + "Error parsing tr: td elements count is not 12, but {}", + td_vec.len() + )); + } + + result_vec.push(get_person_data(&td_vec)?); + } + + Ok(result_vec) +} + +fn get_person_data(td_vec: &Vec) -> Result { + // Surnames + let surname_ref = td_vec[3]; + let name_ref = td_vec[4]; + let username_ref = td_vec[5]; + + // Selectors + let a_selector = Selector::parse("a").expect("Error parsing `a` selector"); + + // + // Get the href of the surname link + // + let surnames_a_node = surname_ref + .first_child() + .ok_or("Expected the 3rd td element to have a children")?; + + let surnames_a_element = surnames_a_node + .value() + .as_element() + .ok_or("Expected the 3rd td element to have an html children")?; + + let href_value = surnames_a_element + .attr("href") + .ok_or("Expected the 3rd td element's children to have an href attribute")?; + + // Get the surname + let Some(surname_a_element) = surname_ref.select(&a_selector).next() else { + return Err("Expected the 3rd td element to have an `a` element".into()); + }; + let surname = surname_a_element.inner_html(); + + // Get the name + let Some(surnames_a_element) = name_ref.select(&a_selector).next() else { + return Err("Expected the 4th td element to have an `a` element".into()); + }; + let name = surnames_a_element.inner_html(); + + // Get the username + let username = username_ref.inner_html(); + + // Parse userid from href + // format: https://testing.aulavirtual.eegsac.com/main/admin/user_information.php?user_id=1087 + // Get the position of 'user_id=' + let user_id_start = href_value.find("user_id=").ok_or("Error parsing user_id")? + 8; + let user_id = href_value[user_id_start..].to_string(); + + Ok(ClassroomPerson { + name, + surname, + username, + user_id, + }) +}