feat: speed up imports

This commit is contained in:
Daniel Kempkens 2023-08-22 00:32:38 +02:00
parent 529161f21c
commit 956f356a2c
Signed by: daniel
SSH key fingerprint: SHA256:Ks/MyhQYcPRQiwMKLAKquWCdCPe3JXlb1WttgnAoSeM

View file

@ -14,6 +14,7 @@ defmodule BdfrBrowser.Importer do
field :fs_pid, pid field :fs_pid, pid
field :post_changes, [Path.t()], default: MapSet.new() field :post_changes, [Path.t()], default: MapSet.new()
field :chat_changes, [Path.t()], default: MapSet.new() field :chat_changes, [Path.t()], default: MapSet.new()
field :last_import, non_neg_integer()
end end
end end
@ -37,7 +38,7 @@ defmodule BdfrBrowser.Importer do
end end
end end
def posts_and_comments do def posts_and_comments(last_import \\ nil) do
_ = Logger.info("Importing posts and comments ...") _ = Logger.info("Importing posts and comments ...")
result = result =
@ -49,7 +50,7 @@ defmodule BdfrBrowser.Importer do
for date <- list_folders(paths: [subreddit]) do for date <- list_folders(paths: [subreddit]) do
_ = Logger.debug("Importing entries from `#{subreddit}' on `#{date}' ...") _ = Logger.debug("Importing entries from `#{subreddit}' on `#{date}' ...")
for post <- read_posts(paths: [subreddit, date], ext: ".json") do for post <- read_posts(paths: [subreddit, date], ext: ".json", last_import: last_import) do
_ = Logger.debug("Importing `#{post["id"]}' from `#{subreddit}' ...") _ = Logger.debug("Importing `#{post["id"]}' from `#{subreddit}' ...")
{:ok, post_record} = import_post(post, subreddit_record) {:ok, post_record} = import_post(post, subreddit_record)
@ -142,11 +143,11 @@ defmodule BdfrBrowser.Importer do
end end
@impl true @impl true
def handle_cast(:background_import, state) do def handle_cast(:background_import, %State{last_import: last_import} = state) do
_ = subreddits() _ = subreddits()
_ = posts_and_comments() _ = posts_and_comments(last_import)
_ = chats() _ = chats()
{:noreply, state} {:noreply, %State{state | last_import: System.os_time(:second)}}
end end
@impl true @impl true
@ -239,6 +240,7 @@ defmodule BdfrBrowser.Importer do
defp read_posts(args) do defp read_posts(args) do
posts = list_folders(args) posts = list_folders(args)
sort = Keyword.get(args, :sort, :desc) sort = Keyword.get(args, :sort, :desc)
last_import = Keyword.get(args, :last_import)
base_directory = Application.fetch_env!(:bdfr_browser, :base_directory) base_directory = Application.fetch_env!(:bdfr_browser, :base_directory)
post_dir = Path.join([base_directory | Keyword.fetch!(args, :paths)]) post_dir = Path.join([base_directory | Keyword.fetch!(args, :paths)])
@ -246,11 +248,25 @@ defmodule BdfrBrowser.Importer do
parsed_posts = parsed_posts =
for post <- posts do for post <- posts do
file_path = Path.join([post_dir, post]) file_path = Path.join([post_dir, post])
if is_nil(last_import) do
parsed = file_path |> File.read!() |> Jason.decode!() parsed = file_path |> File.read!() |> Jason.decode!()
Map.put(parsed, "filename", post) Map.put(parsed, "filename", post)
else
{:ok, info} = File.stat(file_path, time: :posix)
if info.mtime > last_import do
parsed = file_path |> File.read!() |> Jason.decode!()
Map.put(parsed, "filename", post)
else
nil
end
end
end end
Enum.sort_by(parsed_posts, fn p -> p["created_utc"] end, sort) parsed_posts
|> Enum.reject(&is_nil/1)
|> Enum.sort_by(fn p -> p["created_utc"] end, sort)
end end
defp read_chats(args) do defp read_chats(args) do