Compare commits

..

No commits in common. "3d12877e2718103aedde5d8e47d2fa28901d10da" and "f288527ea44be2258ea1b46d04e32117614ef4c0" have entirely different histories.

5 changed files with 32 additions and 402 deletions

244
Cargo.lock generated
View file

@ -17,15 +17,6 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "allocator-api2" name = "allocator-api2"
version = "0.2.21" version = "0.2.21"
@ -106,15 +97,6 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "atomic"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994"
dependencies = [
"bytemuck",
]
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.4.0" version = "1.4.0"
@ -172,12 +154,6 @@ version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "bytemuck"
version = "1.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c"
[[package]] [[package]]
name = "byteorder" name = "byteorder"
version = "1.5.0" version = "1.5.0"
@ -450,19 +426,6 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "figment"
version = "0.10.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3"
dependencies = [
"atomic",
"pear",
"serde",
"uncased",
"version_check",
]
[[package]] [[package]]
name = "flume" name = "flume"
version = "0.11.1" version = "0.11.1"
@ -832,12 +795,6 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "inlinable_string"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
[[package]] [[package]]
name = "is_terminal_polyfill" name = "is_terminal_polyfill"
version = "1.70.1" version = "1.70.1"
@ -930,15 +887,6 @@ version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "matchers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
dependencies = [
"regex-automata 0.1.10",
]
[[package]] [[package]]
name = "md-5" name = "md-5"
version = "0.10.6" version = "0.10.6"
@ -975,16 +923,6 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]] [[package]]
name = "num-bigint-dig" name = "num-bigint-dig"
version = "0.8.4" version = "0.8.4"
@ -1053,12 +991,6 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]] [[package]]
name = "parking" name = "parking"
version = "2.2.1" version = "2.2.1"
@ -1088,29 +1020,6 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]
[[package]]
name = "pear"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467"
dependencies = [
"inlinable_string",
"pear_codegen",
"yansi",
]
[[package]]
name = "pear_codegen"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147"
dependencies = [
"proc-macro2",
"proc-macro2-diagnostics",
"quote",
"syn",
]
[[package]] [[package]]
name = "pem-rfc7468" name = "pem-rfc7468"
version = "0.7.0" version = "0.7.0"
@ -1183,19 +1092,6 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "proc-macro2-diagnostics"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
dependencies = [
"proc-macro2",
"quote",
"syn",
"version_check",
"yansi",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.40" version = "1.0.40"
@ -1248,15 +1144,11 @@ dependencies = [
"chrono", "chrono",
"clap", "clap",
"directories", "directories",
"figment",
"serde", "serde",
"serde_json", "serde_json",
"sqlx", "sqlx",
"thiserror", "thiserror",
"tokio", "tokio",
"tracing",
"tracing-core",
"tracing-subscriber",
] ]
[[package]] [[package]]
@ -1279,50 +1171,6 @@ dependencies = [
"thiserror", "thiserror",
] ]
[[package]]
name = "regex"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata 0.4.9",
"regex-syntax 0.8.5",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
name = "regex-automata"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax 0.8.5",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]] [[package]]
name = "rsa" name = "rsa"
version = "0.9.8" version = "0.9.8"
@ -1446,15 +1294,6 @@ dependencies = [
"digest", "digest",
] ]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]] [[package]]
name = "shlex" name = "shlex"
version = "1.3.0" version = "1.3.0"
@ -1795,16 +1634,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "thread_local"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]] [[package]]
name = "tinystr" name = "tinystr"
version = "0.7.6" version = "0.7.6"
@ -1898,36 +1727,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
dependencies = [ dependencies = [
"once_cell", "once_cell",
"valuable",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
] ]
[[package]] [[package]]
@ -1936,15 +1735,6 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
[[package]]
name = "uncased"
version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697"
dependencies = [
"version_check",
]
[[package]] [[package]]
name = "unicode-bidi" name = "unicode-bidi"
version = "0.3.18" version = "0.3.18"
@ -2001,12 +1791,6 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "valuable"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
[[package]] [[package]]
name = "vcpkg" name = "vcpkg"
version = "0.2.15" version = "0.2.15"
@ -2108,28 +1892,6 @@ dependencies = [
"wasite", "wasite",
] ]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-core" name = "windows-core"
version = "0.61.0" version = "0.61.0"
@ -2358,12 +2120,6 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
[[package]]
name = "yansi"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]] [[package]]
name = "yoke" name = "yoke"
version = "0.7.5" version = "0.7.5"

View file

@ -12,7 +12,3 @@ clap = { version = "4.5.37", features = ["derive"] }
serde = { version = "1.0.219", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] }
chrono = {version = "0.4.41", features = ["serde"]} chrono = {version = "0.4.41", features = ["serde"]}
serde_json = "1.0.140" serde_json = "1.0.140"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19" , features = ["env-filter"]}
figment = { version = "0.10.19", features = ["env"] }
tracing-core = "0.1.33"

View file

@ -1,116 +1,13 @@
use clap::Parser;
use serde::{Deserialize, Serialize};
use std::path::PathBuf; use std::path::PathBuf;
use tracing_core::{Level, LevelFilter}; use clap::Parser;
pub enum VerbosityFilter { #[derive(Debug, Parser)]
Off, pub struct Args {
Error,
Warn,
Info,
Debug,
Trace,
}
impl VerbosityFilter {
fn with_offset(&self, offset: i16) -> VerbosityFilter {
let value = match self {
Self::Off => 0_i16,
Self::Error => 1,
Self::Warn => 2,
Self::Info => 3,
Self::Debug => 4,
Self::Trace => 5,
};
match value.saturating_add(offset) {
i16::MIN..=0 => Self::Off,
1 => Self::Error,
2 => Self::Warn,
3 => Self::Info,
4 => Self::Debug,
5..=i16::MAX => Self::Trace,
}
}
}
impl From<LevelFilter> for VerbosityFilter {
fn from(level: LevelFilter) -> Self {
match level {
LevelFilter::OFF => Self::Off,
LevelFilter::ERROR => Self::Error,
LevelFilter::WARN => Self::Warn,
LevelFilter::INFO => Self::Info,
LevelFilter::DEBUG => Self::Debug,
LevelFilter::TRACE => Self::Trace,
}
}
}
impl From<VerbosityFilter> for Option<Level> {
fn from(filter: VerbosityFilter) -> Self {
match filter {
VerbosityFilter::Off => None,
VerbosityFilter::Error => Some(Level::ERROR),
VerbosityFilter::Warn => Some(Level::WARN),
VerbosityFilter::Info => Some(Level::INFO),
VerbosityFilter::Debug => Some(Level::DEBUG),
VerbosityFilter::Trace => Some(Level::TRACE),
}
}
}
impl From<Option<Level>> for VerbosityFilter {
fn from(level: Option<Level>) -> Self {
match level {
None => Self::Off,
Some(Level::ERROR) => Self::Error,
Some(Level::WARN) => Self::Warn,
Some(Level::INFO) => Self::Info,
Some(Level::DEBUG) => Self::Debug,
Some(Level::TRACE) => Self::Trace,
}
}
}
#[derive(Debug, Parser, Serialize, Deserialize)]
pub struct Config {
path: PathBuf, path: PathBuf,
#[arg(
long,
short = 'v',
action = clap::ArgAction::Count,
global = true,
help = "Increase logging verbosity",
)]
verbose: u8,
#[arg(
long,
short = 'q',
action = clap::ArgAction::Count,
global = true,
help = "Decrease logging verbosity",
conflicts_with = "verbose",
)]
quiet: u8,
} }
impl Config { impl Args {
pub fn path(&self) -> &PathBuf { pub fn path(&self) -> &PathBuf {
&self.path &self.path
} }
/// Gets the filter that should be applied to the logger.
///
pub fn filter(&self) -> VerbosityFilter {
let offset = self.verbose as i16 - self.quiet as i16;
VerbosityFilter::Error.with_offset(offset)
}
pub fn tracing_level(&self) -> Option<tracing_core::Level> {
self.filter().into()
}
/// Get the tracing level filter.
pub fn tracing_level_filter(&self) -> tracing_core::LevelFilter {
self.filter().into()
}
} }

View file

@ -1,29 +1,19 @@
use clap::Parser;
use readwise_bulk_upload::config::Config;
use readwise_bulk_upload::readwise::DocumentPayload;
use readwise_bulk_upload::sql::TaskManager;
use readwise_bulk_upload::{Error, Result};
use std::fs::File; use std::fs::File;
use tracing_subscriber; use clap::Parser;
use figment::{Figment, providers::{Serialized, Env, Format}}; use readwise_bulk_upload::config::Args;
use readwise_bulk_upload::readwise::DocumentPayload;
use readwise_bulk_upload::sql::{TaskManager};
use readwise_bulk_upload::{Error, Result};
#[tokio::main] #[tokio::main]
async fn main() -> Result<()> { async fn main() -> Result<()> {
let args: Config = Figment::new() let args = Args::parse();
.merge(Serialized::defaults(Config::parse()))
.merge(Env::prefixed("APP_"))
.extract()?;
tracing_subscriber::fmt() let file = File::open(args.path())
.with_max_level(args.verbose) .map_err(|_| Error::Runtime(format!(
.init();
let file = File::open(args.path()).map_err(|_| {
Error::Runtime(format!(
r#"The file "{}" could not be open"#, r#"The file "{}" could not be open"#,
args.path().display() args.path().display()
)) )))?;
})?;
let documents: Vec<DocumentPayload> = serde_json::from_reader(file)?; let documents: Vec<DocumentPayload> = serde_json::from_reader(file)?;

View file

@ -4,9 +4,8 @@ use serde::Serialize;
use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode}; use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode};
use sqlx::{QueryBuilder, Sqlite, SqlitePool}; use sqlx::{QueryBuilder, Sqlite, SqlitePool};
use tokio::fs; use tokio::fs;
use tracing::{info, instrument};
static SQLITE_BIND_LIMIT: usize = 32766; static SQLITE_QUERY_LIMIT: usize = 32766;
#[derive(sqlx::Type)] #[derive(sqlx::Type)]
#[repr(u8)] #[repr(u8)]
@ -21,7 +20,6 @@ pub trait TaskPayload {
fn get_key(&self) -> String; fn get_key(&self) -> String;
} }
#[derive(Debug)]
pub struct TaskManager { pub struct TaskManager {
pool: SqlitePool, pool: SqlitePool,
} }
@ -53,12 +51,10 @@ impl TaskManager {
Ok(pool) Ok(pool)
} }
#[instrument(skip(self, values))]
pub async fn load_tasks<T>(&self, values: Vec<T>) -> crate::Result<()> pub async fn load_tasks<T>(&self, values: Vec<T>) -> crate::Result<()>
where where
T: TaskPayload + Serialize + std::fmt::Debug, T: TaskPayload + Serialize,
{ {
let mut tx = self.pool.begin().await?;
let mut builder: QueryBuilder<'_, Sqlite> = let mut builder: QueryBuilder<'_, Sqlite> =
QueryBuilder::new("insert into tasks(payload_key, payload, status_id)"); QueryBuilder::new("insert into tasks(payload_key, payload, status_id)");
@ -67,27 +63,22 @@ impl TaskManager {
.map(|value| Ok((value.get_key(), serde_json::to_string(value)?))) .map(|value| Ok((value.get_key(), serde_json::to_string(value)?)))
.collect(); .collect();
// FIXME: create chunk based insert by SQLITE_QUERY_LIMIT / N° of binds
let mut affected_rows = 0; builder.push_values(
// Chunk the query by the size limit of bind params args?.into_iter().take(SQLITE_QUERY_LIMIT / 3),
for chunk in args?.chunks(SQLITE_BIND_LIMIT / 3) { |mut builder, item| {
builder.push_values(chunk, |mut builder, item| {
builder builder
.push_bind(&item.0) .push_bind(item.0)
.push_bind(&item.1) .push_bind(item.1)
.push_bind(TaskStatus::Pending); .push_bind(TaskStatus::Pending);
}); },
);
builder.push("ON conflict (payload_key) DO NOTHING"); builder.push("ON conflict (payload_key) DO NOTHING");
let query = builder.build(); let query = builder.build();
affected_rows += query.execute(&mut *tx).await?.rows_affected(); query.execute(&self.pool).await?;
builder.reset();
}
tx.commit().await?;
info!("{} rows inserted.", affected_rows);
Ok(()) Ok(())
} }