fix: Chat image IDs

This commit is contained in:
Daniel Kempkens 2023-08-17 01:12:58 +02:00
parent e86a0f041b
commit 27cf234457
Signed by: daniel
SSH key fingerprint: SHA256:Ks/MyhQYcPRQiwMKLAKquWCdCPe3JXlb1WttgnAoSeM
3 changed files with 64 additions and 5 deletions

View file

@ -1,7 +1,7 @@
defmodule BdfrBrowser.HTTP.Plug do
use Plug.Router
alias BdfrBrowser.{Chat, Comment, Message, Repo, Post, Subreddit}
alias BdfrBrowser.{Chat, Comment, Importer, Message, Repo, Post, Subreddit}
plug :match
plug :dispatch
@ -150,15 +150,20 @@ defmodule BdfrBrowser.HTTP.Plug do
end
post "/_import" do
:ok = BdfrBrowser.Importer.background_import()
:ok = Importer.background_import()
send_resp(conn, 200, "IMPORTING")
end
post "/_import_changes" do
:ok = BdfrBrowser.Importer.background_import_changes()
:ok = Importer.background_import_changes()
send_resp(conn, 200, "IMPORTING CHANGES")
end
post "/_cleanup" do
:ok = Importer.cleanup_messages()
send_resp(conn, 200, "CLEANED UP")
end
get "/_ping" do
send_resp(conn, 200, "PONG")
end

View file

@ -5,6 +5,8 @@ defmodule BdfrBrowser.Importer do
alias BdfrBrowser.{Chat, Comment, Message, Post, Repo, Subreddit}
@image_extensions [".jpg", ".jpeg", ".gif", ".png", ".webp"]
defmodule State do
use TypedStruct
@ -77,6 +79,29 @@ defmodule BdfrBrowser.Importer do
List.flatten(result)
end
def cleanup_messages do
all_images = Message.images() |> Repo.all()
dupes =
for image <- all_images, uniq: true do
incorrect_id =
:sha3_256
|> :crypto.hash([image.chat_id, DateTime.to_iso8601(image.posted_at)])
|> Base.encode16(case: :lower)
potential_dupes = Message.potential_duplicates(image) |> Repo.all()
Enum.filter(potential_dupes, fn msg ->
msg.message == "Image" or
msg.message == "image" or
(msg.id == incorrect_id and String.starts_with?(msg.message, ["mxc://", "https://i.redd.it/"])) or
(String.starts_with?(msg.message, "image") and String.ends_with?(msg.message, @image_extensions))
end)
end
for dupe <- List.flatten(dupes), do: Repo.delete(dupe)
end
def background_import do
GenServer.cast(__MODULE__, :background_import)
end
@ -326,7 +351,7 @@ defmodule BdfrBrowser.Importer do
end
defp import_message(message, chat) when not is_nil(chat) do
id = :sha3_256 |> :crypto.hash([chat.id, message["timestamp"]]) |> Base.encode16(case: :lower)
id = calculate_message_id(message, chat.id)
message_content = message["content"]["Message"]
{:ok, posted_at, 0} = DateTime.from_iso8601(message["timestamp"])
@ -343,7 +368,11 @@ defmodule BdfrBrowser.Importer do
conflict_target: :id
)
existing_image = message_record.message == "Image" or String.starts_with?(message_record.message, "image")
existing_image =
message_record.message == "Image" or
message_record.message == "image" or
(String.starts_with?(message_record.message, "image") and
String.ends_with?(message_record.message, @image_extensions))
message_record =
if existing_image and String.starts_with?(message_content, "mxc://") do
@ -355,4 +384,15 @@ defmodule BdfrBrowser.Importer do
message_record
end
defp calculate_message_id(message, chat_id) do
message_content = message["content"]["Message"]
is_img = String.starts_with?(message_content, ["mxc://", "https://i.redd.it/"])
if is_img do
:sha3_256 |> :crypto.hash([chat_id, message["timestamp"], message_content]) |> Base.encode16(case: :lower)
else
:sha3_256 |> :crypto.hash([chat_id, message["timestamp"]]) |> Base.encode16(case: :lower)
end
end
end

View file

@ -22,4 +22,18 @@ defmodule BdfrBrowser.Message do
order_by: [asc: m.posted_at]
)
end
def images do
from(m in __MODULE__,
where: like(m.message, "mxc://%") or like(m.message, "https://i.redd.it/%"),
order_by: [asc: m.posted_at]
)
end
def potential_duplicates(other_m) do
from(m in __MODULE__,
where: m.id != ^other_m.id and m.chat_id == ^other_m.chat_id and m.posted_at == ^other_m.posted_at,
order_by: [asc: m.posted_at]
)
end
end