From 65f8468daaa80e5e2127f2b5c1c2fe937fb0fcb4 Mon Sep 17 00:00:00 2001 From: Daniel Kempkens Date: Fri, 20 Oct 2023 18:23:07 +0200 Subject: [PATCH] fix: Improved importing of duplicate posts and comments --- lib/bdfr_browser/comment.ex | 9 ++++ lib/bdfr_browser/importer.ex | 86 +++++++++++++++++++----------------- lib/bdfr_browser/post.ex | 9 ++++ 3 files changed, 64 insertions(+), 40 deletions(-) diff --git a/lib/bdfr_browser/comment.ex b/lib/bdfr_browser/comment.ex index 863459b..6693afc 100644 --- a/lib/bdfr_browser/comment.ex +++ b/lib/bdfr_browser/comment.ex @@ -43,6 +43,15 @@ defmodule BdfrBrowser.Comment do ) end + def get_import(id) do + from(c in __MODULE__, + select: %{ + id: c.id + }, + where: c.id == ^id + ) + end + def search(str), do: search(str, nil) def search(str, subreddits) when is_nil(subreddits) do diff --git a/lib/bdfr_browser/importer.ex b/lib/bdfr_browser/importer.ex index b7787e0..eaf9a32 100644 --- a/lib/bdfr_browser/importer.ex +++ b/lib/bdfr_browser/importer.ex @@ -310,42 +310,46 @@ defmodule BdfrBrowser.Importer do defp import_post(post, subreddit) when not is_nil(subreddit) do id = post["id"] + db_post = id |> Post.get_import() |> Repo.one() - %Post{ - id: id, - title: post["title"], - selftext: post["selftext"], - url: post["url"], - permalink: post["permalink"], - author: post["author"], - upvote_ratio: post["upvote_ratio"], - posted_at: DateTime.from_unix!(trunc(post["created_utc"])), - filename: Path.basename(post["filename"], ".json"), - subreddit: subreddit - } - |> Repo.insert( - on_conflict: [set: [id: id]], - conflict_target: :id - ) + if is_nil(db_post) do + %Post{ + id: id, + title: post["title"], + selftext: post["selftext"], + url: post["url"], + permalink: post["permalink"], + author: post["author"], + upvote_ratio: post["upvote_ratio"], + posted_at: DateTime.from_unix!(trunc(post["created_utc"])), + filename: Path.basename(post["filename"], ".json"), + subreddit: subreddit + } + |> Repo.insert() + else + {:ok, db_post} + end end defp import_comment(comment, post, parent) when not is_nil(post) do id = comment["id"] + db_comment = id |> Comment.get_import() |> Repo.one() {:ok, parent} = - %Comment{ - id: id, - author: comment["author"], - body: comment["body"], - score: comment["score"], - posted_at: DateTime.from_unix!(trunc(comment["created_utc"])), - post: post, - parent: parent - } - |> Repo.insert( - on_conflict: [set: [id: id]], - conflict_target: :id - ) + if is_nil(db_comment) do + %Comment{ + id: id, + author: comment["author"], + body: comment["body"], + score: comment["score"], + posted_at: DateTime.from_unix!(trunc(comment["created_utc"])), + post: post, + parent: parent + } + |> Repo.insert() + else + {:ok, db_comment} + end children = for child <- comment["replies"], do: import_comment(child, post, parent) @@ -370,19 +374,21 @@ defmodule BdfrBrowser.Importer do id = calculate_message_id(message, chat.id) message_content = message["content"]["Message"] {:ok, posted_at, 0} = DateTime.from_iso8601(message["timestamp"]) + db_message = Repo.get(Message, id) {:ok, message_record} = - %Message{ - id: id, - author: message["author"], - message: message_content, - posted_at: posted_at, - chat: chat - } - |> Repo.insert( - on_conflict: [set: [id: id]], - conflict_target: :id - ) + if is_nil(db_message) do + %Message{ + id: id, + author: message["author"], + message: message_content, + posted_at: posted_at, + chat: chat + } + |> Repo.insert() + else + {:ok, db_message} + end existing_image = message_record.message == "Image" or diff --git a/lib/bdfr_browser/post.ex b/lib/bdfr_browser/post.ex index 3263dc1..c946b78 100644 --- a/lib/bdfr_browser/post.ex +++ b/lib/bdfr_browser/post.ex @@ -98,6 +98,15 @@ defmodule BdfrBrowser.Post do having(query, [p, c, s], count(c.id) > ^more_than) end + def get_import(id) do + from(p in __MODULE__, + select: %{ + id: p.id + }, + where: p.id == ^id + ) + end + def get_full(id) do from(p in __MODULE__, where: p.id == ^id,