fix: Improved importing of duplicate posts and comments
All checks were successful
Build / build (push) Successful in 4m10s

This commit is contained in:
Daniel Kempkens 2023-10-20 18:23:07 +02:00
parent d7bf17fa34
commit 65f8468daa
Signed by: daniel
SSH key fingerprint: SHA256:Ks/MyhQYcPRQiwMKLAKquWCdCPe3JXlb1WttgnAoSeM
3 changed files with 64 additions and 40 deletions

View file

@ -43,6 +43,15 @@ defmodule BdfrBrowser.Comment do
) )
end end
def get_import(id) do
from(c in __MODULE__,
select: %{
id: c.id
},
where: c.id == ^id
)
end
def search(str), do: search(str, nil) def search(str), do: search(str, nil)
def search(str, subreddits) when is_nil(subreddits) do def search(str, subreddits) when is_nil(subreddits) do

View file

@ -310,42 +310,46 @@ defmodule BdfrBrowser.Importer do
defp import_post(post, subreddit) when not is_nil(subreddit) do defp import_post(post, subreddit) when not is_nil(subreddit) do
id = post["id"] id = post["id"]
db_post = id |> Post.get_import() |> Repo.one()
%Post{ if is_nil(db_post) do
id: id, %Post{
title: post["title"], id: id,
selftext: post["selftext"], title: post["title"],
url: post["url"], selftext: post["selftext"],
permalink: post["permalink"], url: post["url"],
author: post["author"], permalink: post["permalink"],
upvote_ratio: post["upvote_ratio"], author: post["author"],
posted_at: DateTime.from_unix!(trunc(post["created_utc"])), upvote_ratio: post["upvote_ratio"],
filename: Path.basename(post["filename"], ".json"), posted_at: DateTime.from_unix!(trunc(post["created_utc"])),
subreddit: subreddit filename: Path.basename(post["filename"], ".json"),
} subreddit: subreddit
|> Repo.insert( }
on_conflict: [set: [id: id]], |> Repo.insert()
conflict_target: :id else
) {:ok, db_post}
end
end end
defp import_comment(comment, post, parent) when not is_nil(post) do defp import_comment(comment, post, parent) when not is_nil(post) do
id = comment["id"] id = comment["id"]
db_comment = id |> Comment.get_import() |> Repo.one()
{:ok, parent} = {:ok, parent} =
%Comment{ if is_nil(db_comment) do
id: id, %Comment{
author: comment["author"], id: id,
body: comment["body"], author: comment["author"],
score: comment["score"], body: comment["body"],
posted_at: DateTime.from_unix!(trunc(comment["created_utc"])), score: comment["score"],
post: post, posted_at: DateTime.from_unix!(trunc(comment["created_utc"])),
parent: parent post: post,
} parent: parent
|> Repo.insert( }
on_conflict: [set: [id: id]], |> Repo.insert()
conflict_target: :id else
) {:ok, db_comment}
end
children = for child <- comment["replies"], do: import_comment(child, post, parent) children = for child <- comment["replies"], do: import_comment(child, post, parent)
@ -370,19 +374,21 @@ defmodule BdfrBrowser.Importer do
id = calculate_message_id(message, chat.id) id = calculate_message_id(message, chat.id)
message_content = message["content"]["Message"] message_content = message["content"]["Message"]
{:ok, posted_at, 0} = DateTime.from_iso8601(message["timestamp"]) {:ok, posted_at, 0} = DateTime.from_iso8601(message["timestamp"])
db_message = Repo.get(Message, id)
{:ok, message_record} = {:ok, message_record} =
%Message{ if is_nil(db_message) do
id: id, %Message{
author: message["author"], id: id,
message: message_content, author: message["author"],
posted_at: posted_at, message: message_content,
chat: chat posted_at: posted_at,
} chat: chat
|> Repo.insert( }
on_conflict: [set: [id: id]], |> Repo.insert()
conflict_target: :id else
) {:ok, db_message}
end
existing_image = existing_image =
message_record.message == "Image" or message_record.message == "Image" or

View file

@ -98,6 +98,15 @@ defmodule BdfrBrowser.Post do
having(query, [p, c, s], count(c.id) > ^more_than) having(query, [p, c, s], count(c.id) > ^more_than)
end end
def get_import(id) do
from(p in __MODULE__,
select: %{
id: p.id
},
where: p.id == ^id
)
end
def get_full(id) do def get_full(id) do
from(p in __MODULE__, from(p in __MODULE__,
where: p.id == ^id, where: p.id == ^id,