From 5984babf5d9358dbfa4805d2e020baffb75c577b Mon Sep 17 00:00:00 2001 From: david-perez Date: Sun, 24 Jan 2021 15:11:16 +0100 Subject: [PATCH] Refactor and expose Item model type This commit heavily refactors the Item model type by including many (if not all) fields returned by the Pocket API when hitting the /v3/get endpoint [0]. The model includes all documented fields as well as other fields that I've encountered in my experiments. Those fields that are not always present have been typed as `Option`s. The commit also exposes fields publically so that consuming applications can directly use them. [0]: https://getpocket.com/developer/docs/v3/retrieve --- Cargo.lock | 73 +++++++ Cargo.toml | 1 + src/bin/pickpocket-batch-favorite.rs | 2 +- src/bin/pickpocket-batch-read.rs | 2 +- src/bin/pickpocket-fixup.rs | 4 +- src/bin/pickpocket-from-csv.rs | 3 +- src/bin/pickpocket-inspect.rs | 2 +- src/lib.rs | 316 ++++++++++++++++++++++++--- 8 files changed, 361 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3cebdc9..a796bfa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -157,6 +157,41 @@ dependencies = [ "sct", ] +[[package]] +name = "darling" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "flate2" version = "1.0.19" @@ -342,6 +377,12 @@ dependencies = [ "webpki", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "indexmap" version = "1.6.1" @@ -490,6 +531,7 @@ dependencies = [ "serde", "serde_derive", "serde_json", + "serde_with", "tokio", ] @@ -678,6 +720,9 @@ name = "serde" version = "1.0.119" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bdd36f49e35b61d49efd8aa7fc068fd295961fd2286d0b2ee9a4c7a14e99cc3" +dependencies = [ + "serde_derive", +] [[package]] name = "serde_derive" @@ -701,6 +746,28 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f6201e064705553ece353a736a64be975680bd244908cf63e8fa71e478a51a" +dependencies = [ + "serde", + "serde_with_macros", +] + +[[package]] +name = "serde_with_macros" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1197ff7de45494f290c1e3e1a6f80e108974681984c87a3e480991ef3d0f1950" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "slab" version = "0.4.2" @@ -724,6 +791,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "strsim" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c" + [[package]] name = "syn" version = "1.0.58" diff --git a/Cargo.toml b/Cargo.toml index ea56821..7dc6e7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,5 +13,6 @@ hyper-rustls = "0.22" serde = "1.0.119" serde_json = "1.0.61" serde_derive = "1.0.119" +serde_with = "1.6" tokio = { version = "1.0.2", features = ["rt-multi-thread", "macros"] } csv = "1.1" diff --git a/src/bin/pickpocket-batch-favorite.rs b/src/bin/pickpocket-batch-favorite.rs index c3170d1..91d65dd 100644 --- a/src/bin/pickpocket-batch-favorite.rs +++ b/src/bin/pickpocket-batch-favorite.rs @@ -16,7 +16,7 @@ async fn main() { match app.get(&url as &str) { Some(id) => { let item = cache_reading_list.get(id).expect("cant locate id"); - if item.favorite() == FavoriteStatus::NotFavorited { + if item.favorite == FavoriteStatus::NotFavorited { ids.insert(id); } else { println!("Url {} already marked as favorite", url); diff --git a/src/bin/pickpocket-batch-read.rs b/src/bin/pickpocket-batch-read.rs index e0d9207..c9ce1b2 100644 --- a/src/bin/pickpocket-batch-read.rs +++ b/src/bin/pickpocket-batch-read.rs @@ -16,7 +16,7 @@ async fn main() { match app.get(&url as &str) { Some(id) => { let item = cache_reading_list.get(id).expect("cant locate id"); - if item.status() == Status::Unread { + if item.status == Status::Unread { ids.insert(id); } else { println!("Url {} already marked as read", url); diff --git a/src/bin/pickpocket-fixup.rs b/src/bin/pickpocket-fixup.rs index 1aa1769..eb85efb 100644 --- a/src/bin/pickpocket-fixup.rs +++ b/src/bin/pickpocket-fixup.rs @@ -13,11 +13,11 @@ async fn main() { let mut read: BTreeSet<&str> = BTreeSet::new(); for (id, reading_item) in &reading_list { - if reading_item.favorite() == FavoriteStatus::Favorited { + if reading_item.favorite == FavoriteStatus::Favorited { favorites.insert(id); } - if reading_item.status() == Status::Read { + if reading_item.status == Status::Read { read.insert(id); } } diff --git a/src/bin/pickpocket-from-csv.rs b/src/bin/pickpocket-from-csv.rs index d699044..7c945f1 100644 --- a/src/bin/pickpocket-from-csv.rs +++ b/src/bin/pickpocket-from-csv.rs @@ -41,8 +41,7 @@ async fn main() { } Some(id) => { let pocket_item = cache_reading_list.get(id).expect("cant locate id"); - if pocket_item.status() == Status::Unread - && (folder == "Archive" || folder == "Done") + if pocket_item.status == Status::Unread && (folder == "Archive" || folder == "Done") { read_ids.insert(id); } diff --git a/src/bin/pickpocket-inspect.rs b/src/bin/pickpocket-inspect.rs index 88eee89..7d4f700 100644 --- a/src/bin/pickpocket-inspect.rs +++ b/src/bin/pickpocket-inspect.rs @@ -16,7 +16,7 @@ fn main() { url = reading_item.url(), clean = pickpocket::cleanup_url(reading_item.url()), title = reading_item.title(), - status = reading_item.status() + status = reading_item.status ); } } diff --git a/src/lib.rs b/src/lib.rs index 102bb81..52a8a48 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,8 @@ use hyper::{body, header, Body, Method, Request, Uri}; +use serde::de::{self, Deserialize, Deserializer, Unexpected}; +use serde::ser::Serializer; use serde_derive::{Deserialize, Serialize}; +use serde_with::{serde_as, DisplayFromStr}; use std::collections::BTreeMap; use std::fmt::{Display, Formatter, Result}; @@ -10,14 +13,244 @@ pub use auth::*; const DEFAULT_COUNT: u32 = 5000; +/// A Pocket item. +/// The official API docs state that all members are optional. However, empirically it seems safe +/// to assume that the ones that are not `Option`s are always present. +#[serde_as] #[derive(Serialize, Deserialize, Debug)] pub struct Item { - given_url: String, - resolved_url: Option, - given_title: String, - resolved_title: Option, - favorite: String, - status: String, + /// A unique identifier matching the saved item. This id must be used to perform any actions + /// through the v3/modify endpoint. + pub item_id: String, + + /// A unique identifier similar to the item_id but is unique to the actual url of the saved + /// item. The resolved_id identifies unique urls. For example a direct link to a New York Times + /// article and a link that redirects (ex a shortened bit.ly url) to the same article will + /// share the same resolved_id. If this value is 0, it means that Pocket has not processed the + /// item. Normally this happens within seconds but is possible you may request the item before + /// it has been resolved. + pub resolved_id: String, + + /// The actual url that was saved with the item. This url should be used if the user wants to + /// view the item. + pub given_url: String, + + /// The final url of the item. For example if the item was a shortened bit.ly link, this will + /// be the actual article the url linked to. + pub resolved_url: String, + + /// The title that was saved along with the item. + pub given_title: String, + + /// The title that Pocket found for the item when it was parsed. + pub resolved_title: String, + + /// Whether the item is favorited or not. + pub favorite: FavoriteStatus, + + /// Whether the item is unread or read (i.e. in the "Archive"). + pub status: Status, + + /// The first few lines of the item (articles only). + pub excerpt: String, + + /// Whether the item is an article or not. + #[serde(deserialize_with = "deserialize_string_to_bool")] + #[serde(serialize_with = "serialize_bool_to_string")] + pub is_article: bool, + + /// Whether the item has/is an image. + pub has_image: HasImage, + + /// Whether the item has/is a video. + pub has_video: HasVideo, + + /// How many words are in the article. + #[serde_as(as = "DisplayFromStr")] + pub word_count: u64, + + // The following are not documented in the official API docs, but they are present in the + // responses. The ones marked as Option are *sometimes* present in the responses. Use at your + // own risk. + /// UNIX timestamp when the item was added. + #[serde_as(as = "DisplayFromStr")] + pub time_added: u64, + #[serde_as(as = "DisplayFromStr")] + pub time_updated: u64, + + /// UNIX timestamp when the item was read (i.e. moved to the "Archive"). Set to 0 if the item + /// has not been read. + #[serde_as(as = "DisplayFromStr")] + pub time_read: u64, + + /// UNIX timestamp when the item was favorited. Set to 0 if the item has not been favorited. + #[serde_as(as = "DisplayFromStr")] + pub time_favorited: u64, + + pub sort_id: u32, + + #[serde(deserialize_with = "deserialize_string_to_bool")] + #[serde(serialize_with = "serialize_bool_to_string")] + pub is_index: bool, + + /// Language code. This is sometimes set to an empty string. + pub lang: String, + + pub top_image_url: Option, + pub domain_metadata: Option, + pub listen_duration_estimate: u64, + pub time_to_read: Option, + pub amp_url: Option, + + // The following fields are documented in the official API docs and only present when + // detailType=complete. + pub images: Option>, + pub videos: Option>, + pub authors: Option>, + pub tags: Option>, + + // The following are not documented in the official API docs, but they are present in responses + // when detailType=complete. + pub image: Option, +} + +fn deserialize_string_to_bool<'de, D>(deserializer: D) -> std::result::Result +where + D: Deserializer<'de>, +{ + match String::deserialize(deserializer)?.as_ref() { + "0" => Ok(false), + "1" => Ok(true), + other => Err(de::Error::invalid_value(Unexpected::Str(other), &"0 or 1")), + } +} + +fn serialize_bool_to_string(b: &bool, serializer: S) -> std::result::Result +where + S: Serializer, +{ + serializer.serialize_str(if *b { "1" } else { "0" }) +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct DomainMetadata { + pub name: Option, + pub logo: String, + pub greyscale_logo: String, +} + +/// The main image associated with an `Item`. +/// Same as an `Image`, except the `image_id`, `credit`, and `caption` fields are not present. +#[serde_as] +#[derive(Serialize, Deserialize, Debug)] +pub struct MainImage { + /// The `Item`'s `item_id` this image is associated with. + pub item_id: String, + + /// A URL where the image is found. + pub src: String, + + /// Image width. + #[serde_as(as = "DisplayFromStr")] + pub width: u32, + + /// Image height. + #[serde_as(as = "DisplayFromStr")] + pub height: u32, +} + +/// An image associated with an `Item`. +#[serde_as] +#[derive(Serialize, Deserialize, Debug)] +pub struct Image { + /// The `Item`'s `item_id` this image is associated with. + pub item_id: String, + + /// An id for the image. An incremental integer. + pub image_id: String, + + /// A URL where the image is found. + pub src: String, + + /// Image width. Caution: often set to zero. + #[serde_as(as = "DisplayFromStr")] + pub width: u32, + + /// Image height. Caution: often set to zero. + #[serde_as(as = "DisplayFromStr")] + pub height: u32, + + /// Image attribution. Caution: often set to an empty string. + // TODO This field is set to an empty string instead of removed from the response. Change the + // model to have it be of type Option. + pub credit: String, + + /// Image caption. Caution: often set to an empty string. + // TODO This field is set to an empty string instead of removed from the response. Change the + // model to have it be of type Option. + pub caption: String, +} + +#[serde_as] +#[derive(Serialize, Deserialize, Debug)] +pub struct Video { + /// The `Item`'s `item_id` this video is associated with. + pub item_id: String, + + /// An id for the video. An incremental integer. + pub video_id: String, + + /// A URL where the video is found. + pub src: String, + + /// Image width. Caution: often set to zero. + #[serde_as(as = "DisplayFromStr")] + pub width: u32, + + /// Image height. Caution: often set to zero. + #[serde_as(as = "DisplayFromStr")] + pub height: u32, + + // TODO What is this? It seems to be set to 1, 2, 4, 5 or 7. + #[serde_as(as = "DisplayFromStr")] + #[serde(rename = "type")] + video_type: u32, + + /// Seems to be set to YouTube/Vimeo video id. Caution: often set to an empty string. + // TODO This field is set to an empty string instead of removed from the response. Change the + // model to have it be of type Option. + pub vid: String, + + /// Video length in seconds. Caution: often set to Some(0). + #[serde_as(as = "Option")] + pub length: Option, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct Author { + /// The `Item`'s `item_id` this author is associated with. + pub item_id: String, + + /// An id for the author. + pub author_id: String, + + /// Author's name. + pub name: String, + + /// Author's URL. This may be the author's profile page in blogging platforms like e.g. Medium + /// or social networks like Facebook/Google+. Caution: can be an empty string. + // TODO This field is set to an empty string instead of removed from the response. Change the + // model to have it be of type Option. + pub url: String, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct Tag { + /// The `Item`'s `item_id` this tag is applied to. + pub item_id: String, + + /// Tag name. + pub tag: String, } pub type ReadingList = BTreeMap; @@ -39,16 +272,42 @@ enum Action { Add, } -#[derive(PartialEq)] +#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] pub enum FavoriteStatus { - Favorited, + #[serde(rename = "0")] NotFavorited, + #[serde(rename = "1")] + Favorited, } -#[derive(PartialEq)] +#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] pub enum Status { - Read, + #[serde(rename = "0")] Unread, + #[serde(rename = "1")] + Read, + #[serde(rename = "2")] + ShouldBeDeleted, +} + +#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] +pub enum HasImage { + #[serde(rename = "0")] + No, + #[serde(rename = "1")] + Yes, + #[serde(rename = "2")] + IsImage, +} + +#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] +pub enum HasVideo { + #[serde(rename = "0")] + No, + #[serde(rename = "1")] + Yes, + #[serde(rename = "2")] + IsVideo, } impl Display for Status { @@ -59,6 +318,7 @@ impl Display for Status { match *self { Status::Read => "Read", Status::Unread => "Unread", + Status::ShouldBeDeleted => "ShouldBeDeleted", } ) } @@ -66,36 +326,22 @@ impl Display for Status { impl Item { pub fn url(&self) -> &str { - if let Some(resolved) = &self.resolved_url { - if !resolved.is_empty() { - return resolved; - } - } - &self.given_url - } - - pub fn title(&self) -> &str { - let title = self.resolved_title.as_ref().unwrap_or(&self.given_title); - if title.is_empty() { - self.url() + if !self.resolved_url.is_empty() { + &self.resolved_url } else { - title + &self.given_url } } - pub fn favorite(&self) -> FavoriteStatus { - if &self.favorite == "1" { - FavoriteStatus::Favorited - } else { - FavoriteStatus::NotFavorited - } - } - - pub fn status(&self) -> Status { - if &self.status == "1" { - Status::Read + pub fn title(&self) -> &str { + if self.resolved_title.is_empty() { + if self.given_title.is_empty() { + self.url() + } else { + &self.given_title + } } else { - Status::Unread + &self.resolved_title } } }