From 23a1d6a9d65cfa9c85f279787ddb5b4350fbb68b Mon Sep 17 00:00:00 2001 From: Daniel Kempkens Date: Mon, 14 Aug 2023 15:05:27 +0200 Subject: [PATCH] feat: Support importing and viewing chats --- config/runtime.exs | 1 + lib/bdfr_browser/chat.ex | 24 +++++ lib/bdfr_browser/http/plug.ex | 26 ++++- lib/bdfr_browser/importer.ex | 95 ++++++++++++++++++- lib/bdfr_browser/message.ex | 25 +++++ .../20230814110852_create_chats.exs | 21 ++++ priv/templates/http/chat.eex | 18 ++++ priv/templates/http/chats.eex | 16 ++++ priv/templates/http/index.eex | 2 + 9 files changed, 225 insertions(+), 3 deletions(-) create mode 100644 lib/bdfr_browser/chat.ex create mode 100644 lib/bdfr_browser/message.ex create mode 100644 priv/repo/migrations/20230814110852_create_chats.exs create mode 100644 priv/templates/http/chat.eex create mode 100644 priv/templates/http/chats.eex diff --git a/config/runtime.exs b/config/runtime.exs index b2bf4a8..4379560 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -2,6 +2,7 @@ import Config config :bdfr_browser, base_directory: System.get_env("BDFR_BROWSER_BASE_DIRECTORY", "/nonexistant"), + chat_directory: System.get_env("BDFR_BROWSER_CHAT_DIRECTORY", "/nonexistant"), http_ip: to_charlist(System.get_env("BDFR_BROWSER_HTTP_IP", "127.0.0.1")), http_port: String.to_integer(System.get_env("BDFR_BROWSER_HTTP_PORT", "4040")) diff --git a/lib/bdfr_browser/chat.ex b/lib/bdfr_browser/chat.ex new file mode 100644 index 0000000..3577c5d --- /dev/null +++ b/lib/bdfr_browser/chat.ex @@ -0,0 +1,24 @@ +defmodule BdfrBrowser.Chat do + use Ecto.Schema + + import Ecto.Query, only: [from: 2] + + alias BdfrBrowser.Message + + @primary_key {:id, :string, autogenerate: false} + + schema "chats" do + field :accounts, {:array, :string} + + has_many :messages, Message + end + + def listing do + from(c in __MODULE__, + left_join: m in assoc(c, :messages), + select: %{id: c.id, accounts: c.accounts, num_messages: count(m.id), latest_message: max(m.posted_at)}, + order_by: [desc: max(m.posted_at)], + group_by: c.id + ) + end +end diff --git a/lib/bdfr_browser/http/plug.ex b/lib/bdfr_browser/http/plug.ex index ab94ad5..7165bf0 100644 --- a/lib/bdfr_browser/http/plug.ex +++ b/lib/bdfr_browser/http/plug.ex @@ -1,7 +1,7 @@ defmodule BdfrBrowser.HTTP.Plug do use Plug.Router - alias BdfrBrowser.{Repo, Post, Subreddit} + alias BdfrBrowser.{Chat, Message, Repo, Post, Subreddit} plug :match plug :dispatch @@ -64,6 +64,30 @@ defmodule BdfrBrowser.HTTP.Plug do |> send_resp(200, content) end + get "/chats" do + tpl_args = [chats: Chat.listing() |> Repo.all()] + content = render_template("chats", tpl_args) + + conn + |> put_resp_header("content-type", "text/html; charset=utf-8") + |> send_resp(200, content) + end + + get "/chats/:id" do + chat_record = Repo.get(Chat, id) + + tpl_args = [ + chat: chat_record, + messages: chat_record |> Message.listing() |> Repo.all() + ] + + content = render_template("chat", tpl_args) + + conn + |> put_resp_header("content-type", "text/html; charset=utf-8") + |> send_resp(200, content) + end + get "/static/*path" do file_path = Application.app_dir(:bdfr_browser, Path.join("priv/static", path)) diff --git a/lib/bdfr_browser/importer.ex b/lib/bdfr_browser/importer.ex index 4b47401..3b781d7 100644 --- a/lib/bdfr_browser/importer.ex +++ b/lib/bdfr_browser/importer.ex @@ -3,7 +3,7 @@ defmodule BdfrBrowser.Importer do use GenServer - alias BdfrBrowser.{Comment, Post, Repo, Subreddit} + alias BdfrBrowser.{Chat, Comment, Message, Post, Repo, Subreddit} def start_link([]) do GenServer.start_link(__MODULE__, [], name: __MODULE__) @@ -49,6 +49,22 @@ defmodule BdfrBrowser.Importer do List.flatten(result) end + def chats do + _ = Logger.info("Importing chats ...") + + result = + for chat <- read_chats(directory_key: :chat_directory) do + _ = Logger.info("Importing chat `#{chat["id"]}' ...") + + {:ok, chat_record} = import_chat(chat) + message_records = for message <- chat["messages"], do: import_message(message, chat_record) + + {chat_record, List.flatten(message_records)} + end + + List.flatten(result) + end + def background_import do GenServer.cast(__MODULE__, :background_import) end @@ -64,6 +80,7 @@ defmodule BdfrBrowser.Importer do def handle_cast(:background_import, state) do _ = subreddits() _ = posts_and_comments() + _ = chats() {:noreply, state} end @@ -73,7 +90,8 @@ defmodule BdfrBrowser.Importer do paths = Keyword.get(args, :paths, []) extname = Keyword.get(args, :ext, "") sort = Keyword.get(args, :sort, :desc) - base_directory = Application.fetch_env!(:bdfr_browser, :base_directory) + directory_key = Keyword.get(args, :directory_key, :base_directory) + base_directory = Application.fetch_env!(:bdfr_browser, directory_key) [base_directory | paths] |> Path.join() @@ -99,6 +117,45 @@ defmodule BdfrBrowser.Importer do Enum.sort_by(parsed_posts, fn p -> p["created_utc"] end, sort) end + defp read_chats(args) do + directory_key = Keyword.get(args, :directory_key, :chat_directory) + base_directory = Application.fetch_env!(:bdfr_browser, directory_key) + + new_chats = + for chat <- list_folders([{:ext, ".json"} | args]) do + file_path = Path.join([base_directory, chat]) + parsed = file_path |> File.read!() |> Jason.decode!() + Map.put(parsed, "filename", chat) + end + + old_chats = + for chat <- list_folders([{:ext, ".json_lines"} | args]) do + file_path = Path.join([base_directory, chat]) + + messages = + file_path + |> File.stream!() + |> Stream.map(&String.trim/1) + |> Stream.map(fn line -> + {:ok, [author, date, message]} = Jason.decode(line) + formatted_date = date |> String.replace(" UTC", "Z") |> String.replace(" ", "T") + + %{ + "author" => author, + "timestamp" => formatted_date, + "content" => %{ + "Message" => message + } + } + end) + |> Enum.to_list() + + %{"id" => Path.basename(chat, ".json_lines"), "messages" => messages, "filename" => chat} + end + + old_chats ++ new_chats + end + defp import_post(post, subreddit) do id = post["id"] @@ -142,4 +199,38 @@ defmodule BdfrBrowser.Importer do [parent] ++ children end + + defp import_chat(chat) do + id = chat["id"] + accounts = for message <- chat["messages"], uniq: true, do: message["author"] + + %Chat{ + id: id, + accounts: accounts + } + |> Repo.insert( + on_conflict: [set: [id: id]], + conflict_target: :id + ) + end + + defp import_message(message, chat) do + id = :sha3_256 |> :crypto.hash([chat.id, message["timestamp"]]) |> Base.encode16(case: :lower) + {:ok, posted_at, 0} = DateTime.from_iso8601(message["timestamp"]) + + {:ok, message} = + %Message{ + id: id, + author: message["author"], + message: message["content"]["Message"], + posted_at: posted_at, + chat: chat + } + |> Repo.insert( + on_conflict: [set: [id: id]], + conflict_target: :id + ) + + message + end end diff --git a/lib/bdfr_browser/message.ex b/lib/bdfr_browser/message.ex new file mode 100644 index 0000000..326a7bc --- /dev/null +++ b/lib/bdfr_browser/message.ex @@ -0,0 +1,25 @@ +defmodule BdfrBrowser.Message do + use Ecto.Schema + + import Ecto.Query, only: [from: 2] + + alias BdfrBrowser.Chat + + @primary_key {:id, :string, autogenerate: false} + @foreign_key_type :string + + schema "messages" do + field :author, :string + field :message, :string + field :posted_at, :utc_datetime + + belongs_to :chat, Chat + end + + def listing(chat) do + from(m in __MODULE__, + where: m.chat_id == ^chat.id, + order_by: [asc: m.posted_at] + ) + end +end diff --git a/priv/repo/migrations/20230814110852_create_chats.exs b/priv/repo/migrations/20230814110852_create_chats.exs new file mode 100644 index 0000000..414f56d --- /dev/null +++ b/priv/repo/migrations/20230814110852_create_chats.exs @@ -0,0 +1,21 @@ +defmodule BdfrBrowser.Repo.Migrations.CreateChats do + use Ecto.Migration + + def change do + create table(:chats, primary_key: false) do + add :id, :string, primary_key: true, size: 1024 + add :accounts, {:array, :string} + end + + create table(:messages, primary_key: false) do + add :id, :string, primary_key: true, size: 256 + add :author, :string + add :message, :text + add :posted_at, :utc_datetime + + add :chat_id, references(:chats, type: :string) + end + + create index("messages", :chat_id) + end +end diff --git a/priv/templates/http/chat.eex b/priv/templates/http/chat.eex new file mode 100644 index 0000000..44c84e0 --- /dev/null +++ b/priv/templates/http/chat.eex @@ -0,0 +1,18 @@ +

Chats

+ +<%= for message <- messages do %> +
+
+
+
+ <%= Earmark.as_html!(message.message) %> + +
+ <%= message.author %>, + <%= DateTime.to_iso8601(message.posted_at) %> +
+
+
+
+
+<% end %> diff --git a/priv/templates/http/chats.eex b/priv/templates/http/chats.eex new file mode 100644 index 0000000..3c58ab2 --- /dev/null +++ b/priv/templates/http/chats.eex @@ -0,0 +1,16 @@ +

Chats

+ +
+
+ <%= for chat <- chats do %> +
+
+
<%= Enum.join(chat.accounts, ", ") %>
+
+ <%= chat.num_messages %> message(s) - <%= DateTime.to_iso8601(chat.latest_message) %> +
+
+
+ <% end %> +
+
diff --git a/priv/templates/http/index.eex b/priv/templates/http/index.eex index 860a3ae..a5836f3 100644 --- a/priv/templates/http/index.eex +++ b/priv/templates/http/index.eex @@ -2,6 +2,8 @@
+ Chats + <%= for subreddit <- subreddits do %> <%= subreddit %> <% end %>