Skip to content

Commit c0f88c6

Browse files
committed
Implement channel merging.
This is a non-destructive fix for issue #4, but performance is terrible. Sadly, using `channel = ANY(…)` brings Postgres to its knees because it stops using its index. Maybe we could try unrolling into an OR list manually, see if that helps.
1 parent 19fb25f commit c0f88c6

File tree

8 files changed

+110
-31
lines changed

8 files changed

+110
-31
lines changed

ircj-serve/src/db.rs

+22-12
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
use itertools::Itertools;
12
use lazy_static::lazy_static;
23
use std::{collections::HashSet, str::FromStr};
34

4-
use crate::{ChannelInfo, Day};
5+
use crate::{ChannelInfo, ChannelRemap, Day};
56
use ircjournal::{
67
model::{Message, ServerChannel},
78
Database,
@@ -19,14 +20,16 @@ pub(crate) struct Paginated<U> {
1920
pub(crate) page_count: i64,
2021
}
2122

22-
pub(crate) async fn channels(db: &Database) -> Vec<ServerChannel> {
23+
pub(crate) async fn channels(db: &Database, remap: &ChannelRemap) -> Vec<ServerChannel> {
2324
// language=sql
2425
sqlx::query!(r#"SELECT "channel" FROM all_channels()"#)
2526
.fetch_all(db)
2627
.await
2728
.unwrap_or_default()
2829
.iter()
2930
.filter_map(|s| ServerChannel::from_str(s.channel.as_ref().unwrap()).ok())
31+
.map(|sc| remap.canonical(&sc))
32+
.unique()
3033
.collect()
3134
}
3235

@@ -45,18 +48,19 @@ pub(crate) async fn channel_exists(db: &Database, sc: &ServerChannel) -> bool {
4548
pub(crate) async fn channel_info(
4649
db: &Database,
4750
sc: &ServerChannel,
51+
remap: &ChannelRemap,
4852
before: &Day,
4953
) -> Option<ChannelInfo> {
50-
let channel = sc.to_string();
54+
let channels = remap.aliases_str(sc);
5155
// language=sql
5256
sqlx::query!(r#"
53-
WITH "ts" AS (SELECT min("timestamp") "first!", max("timestamp") "last!" FROM "message" WHERE "channel" = $1)
57+
WITH "ts" AS (SELECT min("timestamp") "first!", max("timestamp") "last!" FROM "message" WHERE "channel" = ANY($1))
5458
SELECT "first!", "last!", array(SELECT "nick" FROM all_nicks($1, $2)) "nicks!",
5559
(SELECT row("message".*) FROM "message"
56-
WHERE "channel" = $1 AND "opcode" = 'topic' AND coalesce("payload", '') != '' AND "timestamp" < $3
60+
WHERE "channel" = ANY($1) AND "opcode" = 'topic' AND coalesce("payload", '') != '' AND "timestamp" < $3
5761
ORDER BY "timestamp" DESC LIMIT 1) "topic?:Message"
5862
FROM "ts" GROUP BY 1, 2, 3 LIMIT 1
59-
"#, &channel, HARD_NICK_LIMIT as i64, before.succ().midnight())
63+
"#, &channels, HARD_NICK_LIMIT as i64, before.succ().midnight())
6064
.fetch_optional(db)
6165
.await
6266
.unwrap()
@@ -72,18 +76,20 @@ pub(crate) async fn channel_info(
7276
pub(crate) async fn messages_channel_day(
7377
db: &Database,
7478
sc: &ServerChannel,
79+
remap: &ChannelRemap,
7580
day: &Day,
7681
) -> Vec<Message> {
82+
let channels = remap.aliases_str(sc);
7783
// language=sql
7884
sqlx::query_as!(
7985
Message,
8086
r#"
8187
SELECT * FROM "message"
82-
WHERE "channel" = $1 AND "timestamp" >= $2 AND "timestamp" < $3
88+
WHERE "channel" = ANY($1) AND "timestamp" >= $2 AND "timestamp" < $3
8389
ORDER BY "timestamp"
8490
LIMIT $4
8591
"#,
86-
sc.to_string(),
92+
&channels,
8793
day.midnight(),
8894
day.succ().midnight(),
8995
HARD_MESSAGE_LIMIT as i64
@@ -96,20 +102,22 @@ pub(crate) async fn messages_channel_day(
96102
pub(crate) async fn channel_month_index(
97103
db: &Database,
98104
sc: &ServerChannel,
105+
remap: &ChannelRemap,
99106
year: i32,
100107
month: u32,
101108
) -> HashSet<u32> {
109+
let channels = remap.aliases_str(sc);
102110
let from: Day = chrono::NaiveDate::from_ymd(year, month, 1).into();
103111
let to: Day = chrono::NaiveDate::from_ymd(year + month as i32 / 12, 1 + month % 12, 1).into();
104112
// language=sql
105113
sqlx::query!(
106114
r#"
107115
SELECT DISTINCT EXTRACT(DAY FROM "timestamp") "day!"
108116
FROM "message"
109-
WHERE "channel" = $1 AND ("opcode" IS NULL OR "opcode" = 'me')
117+
WHERE "channel" = ANY($1) AND ("opcode" IS NULL OR "opcode" = 'me')
110118
AND "timestamp" >= $2 AND "timestamp" < $3
111119
"#,
112-
sc.to_string(),
120+
&channels,
113121
from.midnight(),
114122
to.midnight()
115123
)
@@ -124,10 +132,12 @@ pub(crate) async fn channel_month_index(
124132
pub(crate) async fn channel_search(
125133
db: &Database,
126134
sc: &ServerChannel,
135+
remap: &ChannelRemap,
127136
query: &str,
128137
page: i64,
129138
) -> Paginated<Message> {
130139
// Try to find nick:<something> to build a non-empty nick filter.
140+
let channels = remap.aliases_str(sc);
131141
lazy_static! {
132142
static ref NICK: regex::Regex =
133143
regex::Regex::new(r#"\b(nick:[A-Za-z_0-9|.`\*-]+)"#).unwrap();
@@ -162,14 +172,14 @@ pub(crate) async fn channel_search(
162172
SELECT row("message".*) "message!:Message",
163173
ts_headline('english', "line", plainto_tsquery('english', $2), U&'StartSel=\E000, StopSel=\E001') "headline!"
164174
FROM "message"
165-
WHERE "channel" || '' = $1
175+
WHERE "channel" || '' = ANY($1)
166176
AND coalesce("opcode", '') = ''
167177
AND CASE WHEN $2 = '' THEN TRUE ELSE to_tsvector('english', "nick" || ' ' || "line") @@ plainto_tsquery('english', $2) END
168178
AND CASE WHEN $5 = '' THEN TRUE ELSE "nick" LIKE $5 END
169179
)
170180
SELECT *, COUNT(*) OVER () "total!"
171181
FROM "query" t LIMIT $3 OFFSET $4
172-
"#, sc.to_string(), &query, per_page, offset, nick_filter)
182+
"#, &channels, &query, per_page, offset, nick_filter)
173183
.fetch_all(db)
174184
.await
175185
.unwrap();

ircj-serve/src/lib.rs

+40-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@ extern crate rocket;
33

44
use chrono::{Datelike, NaiveDate};
55
use ircjournal::model::{Datetime, Message, ServerChannel};
6-
use std::{collections::HashSet, str::FromStr};
6+
use std::{
7+
collections::{HashMap, HashSet},
8+
str::FromStr,
9+
};
710

811
mod db;
912
pub mod route;
@@ -14,6 +17,42 @@ pub mod watch;
1417

1518
pub(crate) type Nicks = HashSet<String>;
1619

20+
#[derive(Clone)]
21+
pub struct ChannelRemap {
22+
forward: HashMap<ServerChannel, ServerChannel>,
23+
reverse: HashMap<ServerChannel, Vec<ServerChannel>>,
24+
}
25+
26+
impl ChannelRemap {
27+
pub fn new(aliases: &HashMap<String, Vec<String>>) -> std::io::Result<Self> {
28+
let mut reverse = HashMap::with_capacity(aliases.len());
29+
let mut forward = HashMap::with_capacity(aliases.len());
30+
for (new, olds) in aliases {
31+
let new = ServerChannel::from_str(new)?;
32+
let mut o = Vec::with_capacity(olds.len());
33+
for old in olds {
34+
let old = ServerChannel::from_str(old)?;
35+
forward.insert(old.clone(), new.clone());
36+
o.push(old.clone());
37+
}
38+
reverse.insert(new.clone(), o);
39+
}
40+
Ok(Self { forward, reverse })
41+
}
42+
43+
pub(crate) fn canonical(&self, sc: &ServerChannel) -> ServerChannel {
44+
self.forward.get(sc).unwrap_or(sc).clone()
45+
}
46+
47+
pub(crate) fn aliases_str(&self, sc: &ServerChannel) -> Vec<String> {
48+
let mut out = vec![sc.to_string()];
49+
if let Some(x) = self.reverse.get(sc) {
50+
x.iter().map(|sc| sc.to_string()).for_each(|s| out.push(s));
51+
}
52+
out
53+
}
54+
}
55+
1756
#[derive(Debug)]
1857
pub struct ChannelInfo {
1958
pub(crate) sc: ServerChannel,

ircj-serve/src/main.rs

+10-1
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,20 @@ extern crate rocket;
22

33
use figment::providers::Format;
44
use rocket::fairing::AdHoc;
5+
use std::collections::HashMap;
56

67
#[derive(Debug, serde::Deserialize, serde::Serialize)]
78
pub struct Config {
89
pub db: String,
10+
pub aliases: HashMap<String, Vec<String>>,
911
}
1012

1113
impl Default for Config {
1214
fn default() -> Self {
13-
Self { db: "".to_owned() }
15+
Self {
16+
db: "".to_owned(),
17+
aliases: Default::default(),
18+
}
1419
}
1520
}
1621

@@ -23,8 +28,12 @@ async fn get_rocket() -> rocket::Rocket<rocket::Build> {
2328
.merge(figment::providers::Toml::file("ircj-serve.toml"))
2429
.merge(figment::providers::Env::prefixed("IRCJ_"));
2530

31+
let config = figment.extract::<Config>().unwrap();
32+
let remap = ircj_serve::ChannelRemap::new(&config.aliases).unwrap();
33+
2634
rocket::custom(figment)
2735
.attach(AdHoc::config::<Config>())
36+
.manage(remap)
2837
.attach(AdHoc::on_ignite(
2938
"Connect to database and migrate",
3039
|rocket| async move {

ircj-serve/src/route.rs

+26-11
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,21 @@ use tokio::{
1717
pub use crate::route_static::StaticFiles;
1818
use ircjournal::{model::ServerChannel, Database, MessageEvent};
1919

20-
use crate::{view, Day};
20+
use crate::{view, ChannelRemap, Day};
2121

2222
#[get("/")]
23-
async fn home(db: &State<Database>) -> Option<Markup> {
24-
let channels = crate::db::channels(db).await;
23+
async fn home(db: &State<Database>, remap: &State<ChannelRemap>) -> Option<Markup> {
24+
let channels = crate::db::channels(db, remap).await;
2525
Some(view::home(&channels))
2626
}
2727

2828
#[get("/<sc>")]
29-
async fn channel_redirect(db: &State<Database>, sc: ServerChannel) -> Redirect {
29+
async fn channel_redirect(
30+
db: &State<Database>,
31+
remap: &State<ChannelRemap>,
32+
sc: ServerChannel,
33+
) -> Redirect {
34+
let sc = remap.canonical(&sc);
3035
Redirect::temporary(
3136
if let Some(ts) = ircjournal::db::last_message_ts(db, &sc).await {
3237
uri!(channel(&sc, ts.into()))
@@ -38,11 +43,13 @@ async fn channel_redirect(db: &State<Database>, sc: ServerChannel) -> Redirect {
3843

3944
#[get("/<sc>/stream")]
4045
async fn channel_stream(
41-
sc: ServerChannel,
4246
db: &State<Database>,
47+
remap: &State<ChannelRemap>,
4348
queue: &State<Sender<MessageEvent>>,
49+
sc: ServerChannel,
4450
mut end: rocket::Shutdown,
4551
) -> Option<EventStream![]> {
52+
let sc = remap.canonical(&sc);
4653
if !crate::db::channel_exists(db, &sc).await {
4754
return None;
4855
}
@@ -63,12 +70,18 @@ async fn channel_stream(
6370
}
6471

6572
#[get("/<sc>/<day>")]
66-
async fn channel(db: &State<Database>, sc: ServerChannel, day: Day) -> Option<Markup> {
73+
async fn channel(
74+
db: &State<Database>,
75+
remap: &State<ChannelRemap>,
76+
sc: ServerChannel,
77+
day: Day,
78+
) -> Option<Markup> {
79+
let sc = remap.canonical(&sc);
6780
let (messages, info, active_days) = {
6881
tokio::join!(
69-
crate::db::messages_channel_day(db, &sc, &day),
70-
crate::db::channel_info(db, &sc, &day),
71-
crate::db::channel_month_index(db, &sc, day.0.year(), day.0.month()),
82+
crate::db::messages_channel_day(db, &sc, remap, &day),
83+
crate::db::channel_info(db, &sc, remap, &day),
84+
crate::db::channel_month_index(db, &sc, remap, day.0.year(), day.0.month()),
7285
)
7386
};
7487
let truncated = messages.len() == crate::db::HARD_MESSAGE_LIMIT;
@@ -84,17 +97,19 @@ async fn channel(db: &State<Database>, sc: ServerChannel, day: Day) -> Option<Ma
8497
#[get("/<sc>/search?<query>&<page>")]
8598
async fn channel_search(
8699
db: &State<Database>,
100+
remap: &State<ChannelRemap>,
87101
sc: ServerChannel,
88102
query: &str,
89103
page: Option<u64>,
90104
) -> Option<Markup> {
105+
let sc = remap.canonical(&sc);
91106
let page = page.unwrap_or(1);
92107
let (result_page, info) = {
93108
let query = query.to_string();
94109
let today = Day::today();
95110
tokio::join!(
96-
crate::db::channel_search(db, &sc, &query, page as i64),
97-
crate::db::channel_info(db, &sc, &today),
111+
crate::db::channel_search(db, &sc, remap, &query, page as i64),
112+
crate::db::channel_info(db, &sc, remap, &today),
98113
)
99114
};
100115
let messages: Vec<_> = result_page

ircj-serve/src/watch.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use rocket::fairing::AdHoc;
22
use std::{str::FromStr, time::Duration};
33
use tokio::sync::broadcast;
44

5+
use crate::ChannelRemap;
56
use ircjournal::{
67
model::{Message, ServerChannel},
78
Database, MessageEvent,
@@ -12,6 +13,7 @@ const AWAKE_LISTEN_INTERVAL: Duration = Duration::from_secs(60);
1213

1314
pub fn broadcast_message_task(
1415
db: Database,
16+
remap: ChannelRemap,
1517
broadcast: broadcast::Sender<MessageEvent>,
1618
mut shutdown: rocket::Shutdown,
1719
) {
@@ -29,7 +31,8 @@ pub fn broadcast_message_task(
2931
Ok(notification) = listener.recv() => {
3032
if let Ok(message) = serde_json::from_str::<Message>(notification.payload()) {
3133
let sc = ServerChannel::from_str(message.channel.as_ref().unwrap()).unwrap();
32-
let nicks = crate::db::channel_info(&db, &sc, &message.timestamp.into()).await
34+
let sc = remap.canonical(&sc);
35+
let nicks = crate::db::channel_info(&db, &sc, &remap, &message.timestamp.into()).await
3336
.map(|info| info.nicks).unwrap_or_default();
3437
let _ = broadcast.send((sc.clone(), crate::view::formatted_message(&message, &nicks)));
3538
debug!("New message for {:?}, id {}", &sc, message.id);
@@ -56,6 +59,7 @@ fn watch_fairing() -> AdHoc {
5659
.state::<Database>()
5760
.unwrap() // attached above
5861
.clone(),
62+
rocket.state::<ChannelRemap>().unwrap().clone(),
5963
rocket
6064
.state::<broadcast::Sender<MessageEvent>>()
6165
.unwrap() // attached above

ircjournal/migrations/20210915165747_initial.sql

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ CREATE INDEX "channel_ts" ON "message" ("channel", "timestamp");
1919
CREATE INDEX "channel_line_fts" ON "message" USING gin (channel, to_tsvector('english', nick || ' ' || line));
2020

2121
-- https://wiki.postgresql.org/wiki/Loose_indexscan
22-
CREATE OR REPLACE FUNCTION all_nicks(chan text, n numeric)
22+
CREATE OR REPLACE FUNCTION all_nicks(chan text[], n numeric)
2323
RETURNS TABLE
2424
(
2525
nick text
@@ -29,9 +29,9 @@ $$
2929
WITH RECURSIVE t AS (
3030
SELECT min(nick) AS nick, 1 AS cnt
3131
FROM message
32-
WHERE channel = chan AND 1 <= n
32+
WHERE channel = ANY(chan) AND 1 <= n
3333
UNION ALL
34-
SELECT (SELECT min(nick) FROM message WHERE nick > t.nick AND channel = chan), cnt + 1 AS cnt
34+
SELECT (SELECT min(nick) FROM message WHERE nick > t.nick AND channel = ANY(chan)), cnt + 1 AS cnt
3535
FROM t
3636
WHERE t.nick IS NOT NULL AND cnt < n
3737
)

ircjournal/src/db.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
use std::time::Duration;
21
use crate::{
32
model::{Datetime, NewMessage, ServerChannel},
43
Database,
54
};
5+
use std::time::Duration;
66

77
pub async fn create_db(uri: &str) -> Result<Database, sqlx::Error> {
88
// TODO: configurable options.

ircjournal/src/model.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@ use std::io::ErrorKind;
66

77
pub type Datetime = chrono::DateTime<chrono::Utc>;
88

9-
#[derive(PartialEq, Clone, Debug)]
9+
#[derive(PartialEq, Hash, Clone, Debug)]
1010
pub struct ServerChannel {
1111
pub server: String,
1212
pub channel: String,
1313
}
1414

15+
impl Eq for ServerChannel {}
16+
1517
#[derive(PartialEq, Debug, serde::Deserialize, sqlx::Type)]
1618
pub struct Message {
1719
pub id: i32,

0 commit comments

Comments
 (0)