cjerzak commited on
Commit
f193599
·
verified ·
1 Parent(s): 78e95ab

Update app.R

Browse files
Files changed (1) hide show
  1. app.R +89 -743
app.R CHANGED
@@ -1,743 +1,89 @@
1
- # app.R — OrgEmbed: Generate Organizational Name Embeddings with LinkOrgs
2
- # ------------------------------------------------------------------------------
3
- # Features
4
- # - CSV upload or text paste, with column auto-detect + explicit selector
5
- # - Validations & friendly notifications
6
- # - One-click embedding generation (ML backend, v4 by default)
7
- # - Real-time progress milestones (+ large-input confirmation)
8
- # - Summary card + DT preview
9
- # - Clean CSV export of embeddings
10
- # - Basic embedding statistics (incl. PCA variance explained)
11
- # - Modern, accessible UI (bslib theme), tooltips, and help modal
12
- #
13
- # Assumptions
14
- # - Internet required on first ML backend setup/download.
15
- # - LinkOrgs installed & accessible; otherwise the app guides you to install.
16
- #
17
- # Best Practices
18
- # - Reactive parsing and caching
19
- # - Robust tryCatch blocks with console logging
20
- # - Clear separation of concerns with small helpers
21
- # ------------------------------------------------------------------------------
22
-
23
- options(shiny.maxRequestSize = 50 * 1024^2) # accept CSVs up to ~X MB
24
- options(error = NULL)
25
-
26
- suppressPackageStartupMessages({
27
- library(shiny)
28
- library(bslib)
29
- library(DT)
30
- library(shinyWidgets)
31
- })
32
-
33
- # conda configuration for cross-platform Docker environments
34
- configure_python <- function() {
35
- cat("[conda config] Starting Python configuration...\n")
36
-
37
- # Define potential conda locations for different platforms/Docker images
38
- conda_paths <- c(
39
- Sys.getenv("CONDA_EXE"), # User-defined
40
- Sys.which("conda"), # System PATH
41
- "/opt/conda/bin/conda", # Common Docker location
42
- "/usr/local/bin/conda", # Alternative location
43
- "/home/user/miniconda3/bin/conda", # User miniconda
44
- "/root/miniconda3/bin/conda", # Root miniconda
45
- "/usr/bin/conda", # System conda
46
- file.path(Sys.getenv("HOME"), "miniconda3", "bin", "conda"), # Home miniconda
47
- file.path(Sys.getenv("HOME"), "anaconda3", "bin", "conda") # Home anaconda
48
- )
49
-
50
- # Remove empty strings and find first existing conda
51
- conda_paths <- conda_paths[nzchar(conda_paths)]
52
- conda_bin <- Find(file.exists, conda_paths)
53
-
54
- python_configured <- FALSE
55
- config_method <- "none"
56
-
57
- # Method 1: Try conda environment
58
- if (!is.null(conda_bin) && file.exists(conda_bin)) {
59
- cat("[conda config] Found conda at:", conda_bin, "\n")
60
- tryCatch({
61
- reticulate::use_condaenv("LinkOrgs_env", conda = conda_bin, required = TRUE)
62
- python_configured <- TRUE
63
- config_method <- "conda_env"
64
- cat("[reticulate] Successfully configured conda environment 'LinkOrgs_env'\n")
65
- }, error = function(e) {
66
- cat("[conda config] Failed to use conda env 'LinkOrgs_env':", conditionMessage(e), "\n")
67
- })
68
- } else {
69
- cat("[conda config] No conda binary found in expected locations\n")
70
- }
71
-
72
- # Method 2: Try RETICULATE_PYTHON environment variable
73
- if (!python_configured && nzchar(Sys.getenv("RETICULATE_PYTHON"))) {
74
- python_path <- Sys.getenv("RETICULATE_PYTHON")
75
- cat("[conda config] Trying RETICULATE_PYTHON:", python_path, "\n")
76
- tryCatch({
77
- reticulate::use_python(python_path, required = TRUE)
78
- python_configured <- TRUE
79
- config_method <- "env_var"
80
- cat("[conda config] Successfully configured Python from RETICULATE_PYTHON\n")
81
- }, error = function(e) {
82
- cat("[conda config] Failed to use RETICULATE_PYTHON:", conditionMessage(e), "\n")
83
- })
84
- }
85
-
86
- # Method 3: Try default conda base environment as fallback
87
- if (!python_configured && !is.null(conda_bin)) {
88
- cat("[conda config] Trying default conda base environment...\n")
89
- tryCatch({
90
- reticulate::use_condaenv("base", conda = conda_bin, required = FALSE)
91
- python_configured <- TRUE
92
- config_method <- "conda_base"
93
- cat("[conda config] Successfully configured conda base environment\n")
94
- }, error = function(e) {
95
- cat("[conda config] Failed to use conda base environment:", conditionMessage(e), "\n")
96
- })
97
- }
98
-
99
- # Method 4: Let reticulate auto-discover
100
- if (!python_configured) {
101
- cat("[conda config] Falling back to reticulate auto-discovery...\n")
102
- tryCatch({
103
- # Force reticulate to initialize
104
- reticulate::py_config()
105
- python_configured <- TRUE
106
- config_method <- "auto_discovery"
107
- cat("[conda config] Successfully auto-discovered Python\n")
108
- }, error = function(e) {
109
- cat("[conda config] Auto-discovery failed:", conditionMessage(e), "\n")
110
- })
111
- }
112
-
113
- # Always attempt to log final configuration (this runs no matter what)
114
- tryCatch({
115
- config <- reticulate::py_config()
116
- cat("[conda config] FINAL CONFIG:\n")
117
- cat("[conda config] Method:", config_method, "\n")
118
- cat("[conda config] Python:", config$python, "\n")
119
- cat("[conda config] Version:", config$version, "\n")
120
- cat("[conda config] NumPy:", config$numpy, "\n")
121
- if (!is.null(conda_bin)) {
122
- cat("[reticulate] Conda:", conda_bin, "\n")
123
- }
124
- }, error = function(e) {
125
- cat("[conda config] ERROR: Could not retrieve Python configuration:", conditionMessage(e), "\n")
126
- cat("[conda config] Configuration method attempted:", config_method, "\n")
127
- })
128
-
129
- return(python_configured)
130
- }
131
-
132
- # Execute the configuration
133
- configure_python()
134
-
135
- # Optional tooltips (if bsplus is available)
136
- has_bsplus <- requireNamespace("bsplus", quietly = TRUE)
137
- if (has_bsplus) {
138
- bs_tooltip <- function(id, title) bsplus::shinyInput_label_embed(id) %>%
139
- bsplus::bs_embed_tooltip(title, placement = "right")
140
- } else {
141
- bs_tooltip <- function(id, title) NULL
142
- }
143
-
144
- #--- Helpers -------------------------------------------------------------------
145
-
146
- # Nicely guess the name column from a data.frame
147
- guess_name_col <- function(df) {
148
- nms <- tolower(names(df))
149
- patterns <- c("^names?$", "^orgnames?$", "organization", "^org$", "company", "entity", "name")
150
- cand <- unique(unlist(lapply(patterns, function(p) which(grepl(p, nms)))))
151
- if (length(cand) >= 1) names(df)[cand[1]] else names(df)[1]
152
- }
153
-
154
- # Parse pasted text into a data.frame with one "names" column
155
- parse_pasted_names <- function(txt) {
156
- lines <- unlist(strsplit(txt, "\n", fixed = TRUE))
157
- lines <- trimws(lines)
158
- lines <- lines[nzchar(lines)]
159
- if (length(lines) == 0) return(data.frame(names = character(0)))
160
- data.frame(names = lines, stringsAsFactors = FALSE)
161
- }
162
-
163
- # Rename embedding columns to emb_001, emb_002, ...
164
- rename_embed_cols <- function(df, name_col) {
165
- embed_cols <- setdiff(names(df), name_col)
166
- if (length(embed_cols) == 0) return(df)
167
- new_names <- sprintf("emb_%03d", seq_along(embed_cols))
168
- names(df)[match(embed_cols, names(df))] <- new_names
169
- df
170
- }
171
-
172
- # Extract only the numeric embedding matrix from a final result
173
- only_embedding_matrix <- function(final_df) {
174
- is_num <- vapply(final_df, is.numeric, logical(1))
175
- final_df[, is_num, drop = FALSE]
176
- }
177
-
178
- # Safe notification wrapper
179
- notify <- function(txt, type = "message", duration = 5) {
180
- shiny::showNotification(txt, type = type, duration = duration)
181
- }
182
-
183
- #--- UI ------------------------------------------------------------------------
184
-
185
- theme <- bs_theme(bootswatch = "flatly")
186
-
187
- ui <- page_sidebar(
188
- title = div(
189
- tags$a(
190
- "OrgEmbed:",
191
- href = "https://huggingface.co/spaces/cjerzak/LinkOrgs_Online",
192
- target = "_blank",
193
- rel = "noopener noreferrer",
194
- style = "font-weight:700; text-decoration:none; color:inherit;",
195
- id = "orgembed_link",
196
- title = "Open the LinkOrgs Space in a new tab"
197
- ),
198
- span(" Generate Organizational Name Embeddings Using ",
199
- tags$a("LinkOrgs",
200
- href = "https://github.com/cjerzak/LinkOrgs-software",
201
- target = "_blank",
202
- style = "color: inherit; text-decoration: underline;"),
203
- style = "color: #D3D3D3;")
204
- ),
205
- theme = theme,
206
-
207
- sidebar = sidebar(
208
- width = 360,
209
- tags$style(HTML("
210
- .sidebar .shiny-input-container { margin-bottom: 12px; }
211
- .small-note { font-size: 0.9rem; color: #666; }
212
- .tight { margin-top: -6px; }
213
- ")),
214
-
215
- # Input mode
216
- radioButtons(
217
- "input_mode", "Input method",
218
- choices = c("CSV upload" = "csv", "Text paste" = "text"),
219
- selected = "csv", inline = TRUE
220
- ),
221
- #bs_tooltip("input_mode", "Choose how you want to provide names"),
222
-
223
- # CSV upload controls
224
- conditionalPanel(
225
- "input.input_mode == 'csv'",
226
- fileInput("file_csv", "Upload CSV", accept = ".csv", multiple = FALSE),
227
- uiOutput("col_select_ui"),
228
- div(class = "small-note tight",
229
- "Tip: We guess the organization name column but let you override it.")
230
- ),
231
-
232
- # Text paste controls
233
- conditionalPanel(
234
- "input.input_mode == 'text'",
235
- textAreaInput(
236
- "text_names", "Paste one name per line", rows = 6, placeholder = "Apple Inc.\nAlphabet\nMicrosoft"
237
- ),
238
- actionLink("load_examples", "Load examples"),
239
- #bs_tooltip("text_names", "One organization per line. Empty lines are ignored.")
240
- ),
241
-
242
- hr(),
243
-
244
- # Advanced options
245
- numericInput("max_rows", "Max rows to process", value = 5000, min = 100, step = 100),
246
- checkboxInput("include_names", "Include original names/columns in output", value = TRUE),
247
- selectInput("ml_version", "ML model version", choices = c("v1", "v2", "v3", "v4"), selected = "v4"),
248
-
249
- hr(),
250
-
251
- # Main action
252
- actionButton("process", "Process Names", class = "btn-primary", icon = icon("play")),
253
- helpText("Large inputs (> 1000 rows) will prompt for confirmation."),
254
-
255
- hr(),
256
-
257
- # Visible warning for users
258
- div(class = "alert alert-warning", style = "margin-top:8px; padding:8px;",
259
- strong("Warning: "), "Do not navigate away from page while computing embeddings! May take 10 mins to compile neural nets."
260
- ),
261
-
262
- # External help link (opens in new tab)
263
- # External help link (opens in new tab)
264
- tags$a(
265
- id = "open_help_link",
266
- href = "https://connorjerzak.com/linkorgs-summary/",
267
- target = "_blank",
268
- rel = "noopener",
269
- icon("circle-question"),
270
- " Technical details."
271
- ),
272
- tags$span(
273
- "Citation: Libgober, B., & Jerzak, C. T. (2024). Linking datasets on organizations using half a billion open-collaborated records. ",
274
- tags$i("Political Science Research and Methods. "),
275
- tags$a(
276
- href = "https://doi.org/10.1017/psrm.2024.55",
277
- target = "_blank",
278
- rel = "noopener",
279
- "https://doi.org/10.1017/psrm.2024.55"
280
- ),
281
- #". ",
282
- tags$a(
283
- href = "https://connorjerzak.com/wp-content/uploads/2024/07/LinkOrgsBib.txt",
284
- target = "_blank",
285
- rel = "noopener",
286
- " [.bib]"
287
- )
288
- ),
289
- ),
290
- # Main body
291
- layout_columns(
292
- col_widths = c(12),
293
- # Input Preview Card
294
- card(
295
- header = "1) Preview input",
296
- card_body(
297
- div(class = "small-note", "Shows up to the first 10 rows by default."),
298
- fluidRow(
299
- column(
300
- width = 4,
301
- prettySwitch("show_all_preview", "Show full table", value = FALSE)
302
- ),
303
- column(
304
- width = 4,
305
- actionButton("refresh_preview", "Refresh preview", icon = icon("arrows-rotate"))
306
- )
307
- ),
308
- DTOutput("input_preview")
309
- )
310
- ),
311
-
312
- # Embedding Generation & Summary Card
313
- card(
314
- header = "2) Generate embeddings",
315
- card_body(
316
- # Summary (appears after success)
317
- uiOutput("summary_card"),
318
- br(),
319
- conditionalPanel(
320
- "output.has_embeddings == true",
321
- strong("Embeddings preview"),
322
- div(class = "small-note tight", "First 5 rows; download the full CSV below."),
323
- DTOutput("emb_preview"),
324
- br(),
325
- downloadButton("download_embeddings", "Download Embeddings CSV", class = "btn-success")
326
- )
327
- )
328
- ),
329
-
330
- # Analysis Card
331
- conditionalPanel(
332
- "output.has_embeddings == true",
333
- card(
334
- header = "3) Embedding Summary",
335
- card_body(
336
- div(class = "small-note",
337
- "Some statistics and PCA variance explained."),
338
- uiOutput("stats_display")
339
- )
340
- )
341
- )
342
- )
343
- )
344
-
345
- #--- Server --------------------------------------------------------------------
346
-
347
- server <- function(input, output, session) {
348
-
349
- # State ----------------------------------------------------------------------
350
- backend_ready <- reactiveVal(FALSE)
351
- embeddings_df <- reactiveVal(NULL) # final data.frame (original + embeddings)
352
- pca_2d <- reactiveVal(NULL) # data.frame with 2D PCA
353
- pca_10d <- reactiveVal(NULL) # data.frame with 10D PCA
354
- pending_df <- reactiveVal(NULL) # for large dataset confirmation
355
- large_threshold <- 1000
356
-
357
- # Help modal -----------------------------------------------------------------
358
- observeEvent(input$open_help_link, ignoreInit = TRUE, {
359
- showModal(modalDialog(
360
- title = "How to use OrgEmbed",
361
- easyClose = TRUE, size = "l",
362
- tagList(
363
- tags$ol(
364
- tags$li("Choose an input method: upload a CSV or paste names."),
365
- tags$li("For CSV, confirm/select the column that contains organization names."),
366
- tags$li("Click ", tags$strong("Process Names"), " to generate embeddings."),
367
- tags$li("After completion, inspect the preview and click ",
368
- tags$strong("Download Embeddings CSV"), " to export."),
369
- tags$li("Optionally, use PCA to reduce to 2 or 10 dimensions and download.")
370
- ),
371
- tags$hr(),
372
- tags$p(class = "small-note",
373
- "First-time ML backend setup needs internet to download model files.")
374
- )
375
- ))
376
- })
377
-
378
- # Fill examples for text paste -----------------------------------------------
379
- observeEvent(input$load_examples, {
380
- updateTextAreaInput(session, "text_names", value = "Google\nAlphabet Inc.\nMicrosoft\nMeta Platforms\nOpenAI")
381
- })
382
-
383
- # Reactive: parse CSV or text input ------------------------------------------
384
- raw_input <- reactive({
385
- mode <- input$input_mode
386
- if (identical(mode, "csv")) {
387
- req(input$file_csv)
388
- df <- tryCatch(
389
- read.csv(input$file_csv$datapath, stringsAsFactors = FALSE, check.names = FALSE),
390
- error = function(e) {
391
- cat("[CSV read error] ", conditionMessage(e), "\n")
392
- notify("Could not read CSV. Ensure it's a valid .csv file.", "error", 7)
393
- NULL
394
- }
395
- )
396
- validate(need(!is.null(df) && nrow(df) > 0, "Uploaded CSV appears empty."))
397
- df
398
- } else {
399
- validate(need(nzchar(input$text_names), "Please paste at least one name."))
400
- parse_pasted_names(input$text_names)
401
- }
402
- })
403
-
404
- # Update/select name column after CSV upload
405
- observeEvent(raw_input(), {
406
- if (identical(input$input_mode, "csv")) {
407
- df <- raw_input()
408
- guessed <- guess_name_col(df)
409
- updateSelectInput(session, "col_select", choices = names(df), selected = guessed)
410
- }
411
- })
412
-
413
- # UI for selecting names column (CSV)
414
- output$col_select_ui <- renderUI({
415
- req(input$input_mode == "csv", raw_input())
416
- selectInput("col_select", "Names column", choices = names(raw_input()))
417
- })
418
-
419
- # Input preview (first 10 rows or full) --------------------------------------
420
- preview_data <- reactive({
421
- df <- raw_input()
422
- if (identical(input$input_mode, "csv")) {
423
- # show selected column + keep other cols for context
424
- # Nothing to subset here; selection is shown but preview shows all cols
425
- } else {
426
- # text mode ensures column named "names"
427
- }
428
- df
429
- })
430
-
431
- observeEvent(input$refresh_preview, {
432
- # No-op: triggers re-run of preview_data by invalidating reactives
433
- invisible(TRUE)
434
- })
435
-
436
- output$input_preview <- renderDT({
437
- df <- preview_data()
438
- req(df)
439
- to_show <- if (isTRUE(input$show_all_preview)) df else head(df, 10)
440
- datatable(
441
- to_show,
442
- options = list(pageLength = 10, scrollX = TRUE, dom = 'tip'),
443
- rownames = FALSE
444
- )
445
- })
446
-
447
- # Large dataset confirmation flow --------------------------------------------
448
- proceed_with_large <- function(df) {
449
- pending_df(df)
450
- showModal(modalDialog(
451
- title = "Large dataset detected",
452
- "You are about to process ", tags$b(nrow(df)), " rows.",
453
- tags$p("This may take 1–5 minutes depending on your hardware and network. Proceed?"),
454
- footer = tagList(
455
- actionButton("confirm_large", "Proceed", class = "btn-primary"),
456
- modalButton("Cancel")
457
- ),
458
- easyClose = TRUE
459
- ))
460
- }
461
-
462
- observeEvent(input$confirm_large, {
463
- df <- pending_df()
464
- removeModal()
465
- if (!is.null(df)) isolate(run_embeddings(df))
466
- pending_df(NULL)
467
- })
468
-
469
- # Core: run embeddings --------------------------------------------------------
470
- run_embeddings <- function(df) {
471
- req(nrow(df) > 0)
472
-
473
- # Determine the names column
474
- by_col <- if (identical(input$input_mode, "csv")) {
475
- req(input$col_select %in% names(df))
476
- input$col_select
477
- } else {
478
- # text mode
479
- "names"
480
- }
481
-
482
- # Validate names column non-empty
483
- validate(need(any(nzchar(trimws(df[[by_col]]))), "Please provide at least one valid name."))
484
- # Enforce max rows
485
- if (nrow(df) > input$max_rows) {
486
- notify(sprintf("Input truncated to max_rows = %d.", input$max_rows), "warning", 6)
487
- df <- df[seq_len(input$max_rows), , drop = FALSE]
488
- }
489
-
490
- withProgress(message = "Generating embeddings...", value = 0, {
491
- incProgress(0.10, detail = "Parsing input...")
492
- # Defensive copy and clean names
493
- df[[by_col]] <- trimws(df[[by_col]])
494
- df <- df[nzchar(df[[by_col]]), , drop = FALSE]
495
- validate(need(nrow(df) > 0, "Please provide at least one valid name."))
496
-
497
- incProgress(0.20, detail = "Initializing model...")
498
-
499
- incProgress(0.50, detail = "Calling LinkOrgs (ML embeddings)...")
500
- if (!requireNamespace("LinkOrgs", quietly = TRUE)) {
501
- notify("Package 'LinkOrgs' not installed. See README to install.", "error", 10)
502
- return(invisible(NULL))
503
- }
504
-
505
- # Main call: ExportEmbeddingsOnly = TRUE
506
- rep_x <- NULL
507
- err <- NULL
508
- pdf(NULL) # Open null device to discard plots
509
- tryCatch({
510
- rep_x <- LinkOrgs::LinkOrgs(
511
- x = df, y = NULL,
512
- by.x = by_col,
513
- algorithm = "ml",
514
- ml_version = input$ml_version,
515
- ExportEmbeddingsOnly = TRUE
516
- )
517
- }, error = function(e) {
518
- err <<- e
519
- })
520
- dev.off() # Clean up the null device
521
-
522
- if (!is.null(err)) {
523
- cat("[LinkOrgs error] ", conditionMessage(err), "\n")
524
- notify("Embedding generation failed. Backend setup may be incomplete. Check internet/conda and retry.", "error", 10)
525
- return(invisible(NULL))
526
- }
527
-
528
- incProgress(0.80, detail = "Post-processing embeddings...")
529
-
530
- # rep_x$embedx is a data.frame with first column = by_col and remaining = embeddings
531
- embed_df <- rep_x$embedx
532
- # standardize embedding column names
533
- embed_df <- rename_embed_cols(embed_df, name_col = by_col)
534
-
535
- # Compose final output
536
- final <- if (isTRUE(input$include_names)) {
537
- # Bind to original df, avoiding duplicated name col
538
- cols_to_add <- setdiff(names(embed_df), by_col)
539
- cbind(df, embed_df[, cols_to_add, drop = FALSE])
540
- } else {
541
- # Return only embeddings (keep name column for context)
542
- embed_df
543
- }
544
-
545
- embeddings_df(final)
546
-
547
- incProgress(1.00, detail = "Complete!")
548
- notify(sprintf("Embeddings generated for %d names.", nrow(final)), "message", 5)
549
- })
550
- }
551
-
552
- # Process button: orchestrate large confirmation + run -----------------------
553
- observeEvent(input$process, {
554
- df <- raw_input()
555
- req(df)
556
-
557
- # Validate CSV names column selection exists
558
- if (identical(input$input_mode, "csv")) {
559
- if (!isTRUE(input$col_select %in% names(df))) {
560
- notify("Invalid names column—please select again.", "error", 7)
561
- return(invisible(NULL))
562
- }
563
- } else {
564
- # Text mode already enforced with req(text != "")
565
- }
566
-
567
- # Large dataset prompt
568
- if (nrow(df) > large_threshold) {
569
- proceed_with_large(df)
570
- return(invisible(NULL))
571
- }
572
-
573
- # Otherwise proceed immediately
574
- run_embeddings(df)
575
- })
576
-
577
- # Summary card ---------------------------------------------------------------
578
- output$summary_card <- renderUI({
579
- final <- embeddings_df()
580
- if (is.null(final)) {
581
- tagList(
582
- div(class = "small-note",
583
- "Click ", tags$strong("Process Names"),
584
- " to start. You'll see progress updates here.")
585
- )
586
- } else {
587
- emb_mat <- only_embedding_matrix(final)
588
- dims <- ncol(emb_mat)
589
- n <- nrow(final)
590
- bslib::card(
591
- bslib::card_body(
592
- HTML(sprintf(
593
- "<h4 style='margin-top:0;'>Embeddings ready</h4>
594
- <p class='small-note' style='margin-bottom:6px;'>
595
- Generated embeddings for <b>%d</b> names.
596
- </p>
597
- <p class='small-note tight'>
598
- Dimensions: <b>%d</b> (columns starting with <code>emb_</code>).
599
- </p>", n, dims
600
- ))
601
- )
602
- )
603
- }
604
- })
605
-
606
- # Flag for conditionalPanel
607
- output$has_embeddings <- reactive({
608
- !is.null(embeddings_df())
609
- })
610
- outputOptions(output, "has_embeddings", suspendWhenHidden = FALSE)
611
-
612
- # Embedding preview (first 5 rows) -------------------------------------------
613
- output$emb_preview <- renderDT({
614
- final <- embeddings_df(); req(final)
615
- to_show <- head(final, 5)
616
- datatable(
617
- to_show,
618
- options = list(pageLength = 5, scrollX = TRUE, dom = 'tip'),
619
- rownames = FALSE
620
- )
621
- })
622
-
623
- # Download full embeddings ----------------------------------------------------
624
- output$download_embeddings <- downloadHandler(
625
- filename = function() "org_embeddings.csv",
626
- content = function(file) {
627
- final <- embeddings_df(); req(final)
628
- write.csv(final, file, row.names = FALSE)
629
- }
630
- )
631
-
632
- # - helpful statistics
633
- embedding_stats <- reactive({
634
- final <- embeddings_df(); req(final)
635
- emb <- only_embedding_matrix(final); req(ncol(emb) >= 1)
636
-
637
- # extra safety: coerce to a numeric matrix in case anything came in as characters
638
- emb <- as.matrix(emb)
639
- mode(emb) <- "numeric"
640
-
641
- pc <- prcomp(emb, center = TRUE, scale. = TRUE)
642
- var_exp <- pc$sdev^2 / sum(pc$sdev^2) * 100
643
- cum_var <- cumsum(var_exp)
644
-
645
- list(
646
- n = nrow(emb),
647
- dims = ncol(emb),
648
- p1 = round(var_exp[1], 1),
649
- p2 = round(cum_var[min(2, length(cum_var))], 1),
650
- p10 = round(cum_var[min(10, length(cum_var))], 1),
651
- p100 = round(cum_var[min(100,length(cum_var))], 1)
652
- )
653
- })
654
-
655
-
656
- # add to output
657
- output$stats_display <- renderUI({
658
- stats <- embedding_stats()
659
-
660
- # Compose terminal text
661
- txt <- paste0(
662
- "orgembed@localhost:~$ stats\n",
663
- "-------------------------------------\n",
664
- sprintf("rows (n) : %s\n", format(stats$n, big.mark = ",")),
665
- sprintf("dims (d) : %s\n", format(stats$dims, big.mark = ",")),
666
- "PCA variance explained:\n",
667
- sprintf(" PC1 : %.1f%%\n", stats$p1),
668
- sprintf(" PC1+2 : %.1f%%\n", stats$p2),
669
- sprintf(" PC1–10 : %.1f%%\n", stats$p10),
670
- sprintf(" PC1–100 : %.1f%%\n", stats$p100)
671
- )
672
-
673
- # Safely escape for JS
674
- txt_js <- gsub("\\\\", "\\\\\\\\", txt)
675
- txt_js <- gsub("'", "\\\\'", txt_js)
676
- txt_js <- gsub("\n", "\\\\n", txt_js)
677
-
678
- tagList(
679
- # Inline CSS for terminal aesthetics
680
- tags$style(HTML("
681
- .terminal-box {
682
- background: #0b0f12;
683
- color: #b6ffb3;
684
- font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, 'Liberation Mono', 'Courier New', monospace;
685
- font-size: 0.95rem;
686
- padding: 16px 18px;
687
- border-radius: 8px;
688
- border: 1px solid rgba(0,255,100,0.25);
689
- box-shadow: inset 0 0 0 1px rgba(0,255,100,0.05), 0 10px 24px rgba(0,0,0,0.25);
690
- position: relative;
691
- overflow: hidden;
692
- }
693
- .terminal-pre { white-space: pre-wrap; margin: 0; line-height: 1.35; }
694
- .terminal-title { opacity: 0.95; margin-bottom: 8px; }
695
- .terminal-cursor {
696
- display: inline-block; width: 0.6ch; height: 1em; margin-left: 4px;
697
- background: #b6ffb3; animation: blink 1s steps(1) infinite;
698
- vertical-align: -0.2em;
699
- }
700
- @keyframes blink { 50% { opacity: 0; } }
701
- .terminal-scanlines:before {
702
- content: ''; position: absolute; inset: 0; pointer-events: none;
703
- background: linear-gradient(rgba(255,255,255,0.03) 50%, transparent 0);
704
- background-size: 100% 3px; mix-blend-mode: overlay;
705
- }
706
- ")),
707
-
708
- # Terminal container
709
- div(class = "terminal-box terminal-scanlines",
710
- tags$div(
711
- class = "terminal-title",
712
- HTML("orgembed<span style='color:#66ff66'>@</span>localhost:~$ <span style='opacity:.8'>stats</span>")
713
- ),
714
- tags$pre(id = "term_stats", class = "terminal-pre"),
715
- tags$span(class = "terminal-cursor")
716
- ),
717
-
718
- # Type-out effect
719
- tags$script(HTML(sprintf("
720
- (function(){
721
- var el = document.getElementById('term_stats');
722
- if (!el) return;
723
- var text = '%s';
724
- el.textContent = '';
725
- var i = 0;
726
- var speed = 8; // ms per character
727
- (function type(){
728
- if (i < text.length) {
729
- el.textContent += text.charAt(i++);
730
- setTimeout(type, speed);
731
- }
732
- })();
733
- })();
734
- ", txt_js)))
735
- )
736
- })
737
-
738
-
739
- }
740
-
741
- # Run --------------------------------------------------------------------------
742
- shinyApp(ui, server)
743
-
 
1
+ output$stats_display <- renderUI({
2
+ stats <- embedding_stats()
3
+
4
+ # Terminal-style HTML with monospace font and terminal aesthetics
5
+ terminal_output <- HTML(paste0(
6
+ '<div style="
7
+ background-color: #0c0c0c;
8
+ color: #00ff00;
9
+ font-family: \'Courier New\', monospace;
10
+ padding: 20px;
11
+ border-radius: 5px;
12
+ border: 2px solid #333;
13
+ box-shadow: 0 0 10px rgba(0, 255, 0, 0.1);
14
+ font-size: 14px;
15
+ line-height: 1.6;
16
+ ">',
17
+ '<div style="color: #888; margin-bottom: 10px;">$ linkorg_stats --summary</div>',
18
+ '<div style="border-bottom: 1px solid #333; margin-bottom: 15px; padding-bottom: 10px;">',
19
+ '<span style="color: #00ff00;">═══════════════════════════════════════════════════════</span><br/>',
20
+ '<span style="color: #00ff00;"> EMBEDDING SUMMARY STATISTICS</span><br/>',
21
+ '<span style="color: #00ff00;">═══════════════════════════════════════════════════════</span>',
22
+ '</div>',
23
+
24
+ '<div style="margin-bottom: 8px;">',
25
+ '<span style="color: #888;">[INFO]</span> ',
26
+ '<span style="color: #fff;">Processing complete at:</span> ',
27
+ '<span style="color: #0099ff;">', format(Sys.time(), "%Y-%m-%d %H:%M:%S %Z"), '</span>',
28
+ '</div>',
29
+
30
+ '<div style="margin-bottom: 8px;">',
31
+ '<span style="color: #888;">[DATA]</span> ',
32
+ '<span style="color: #fff;">Total embeddings generated:</span> ',
33
+ '<span style="color: #ffff00; font-weight: bold;">', stats$n, '</span>',
34
+ '</div>',
35
+
36
+ '<div style="margin-bottom: 8px;">',
37
+ '<span style="color: #888;">[DATA]</span> ',
38
+ '<span style="color: #fff;">Embedding dimensions:</span> ',
39
+ '<span style="color: #ffff00; font-weight: bold;">', stats$dims, '</span>',
40
+ '</div>',
41
+
42
+ '<div style="margin-top: 15px; border-top: 1px solid #333; padding-top: 15px;">',
43
+ '<div style="color: #00ff00; margin-bottom: 10px;">▶ Principal Component Analysis Results:</div>',
44
+ '</div>',
45
+
46
+ '<div style="margin-left: 20px; margin-bottom: 8px;">',
47
+ '<span style="color: #888;">├─</span> ',
48
+ '<span style="color: #fff;">PC1 variance explained:</span> ',
49
+ '<span style="color: #00ff00;">', sprintf("%.1f%%", stats$p1), '</span>',
50
+ '</div>',
51
+
52
+ '<div style="margin-left: 20px; margin-bottom: 8px;">',
53
+ '<span style="color: #888;">├─</span> ',
54
+ '<span style="color: #fff;">PC1-2 cumulative variance:</span> ',
55
+ '<span style="color: #00ff00;">', sprintf("%.1f%%", stats$p2), '</span>',
56
+ '</div>',
57
+
58
+ '<div style="margin-left: 20px; margin-bottom: 8px;">',
59
+ '<span style="color: #888;">├─</span> ',
60
+ '<span style="color: #fff;">PC1-10 cumulative variance:</span> ',
61
+ '<span style="color: #00ff00;">', sprintf("%.1f%%", stats$p10), '</span>',
62
+ '</div>',
63
+
64
+ '<div style="margin-left: 20px; margin-bottom: 8px;">',
65
+ '<span style="color: #888;">└─</span> ',
66
+ '<span style="color: #fff;">PC1-100 cumulative variance:</span> ',
67
+ '<span style="color: #00ff00;">', sprintf("%.1f%%", stats$p100), '</span>',
68
+ '</div>',
69
+
70
+ '<div style="margin-top: 15px; padding-top: 10px; border-top: 1px solid #333;">',
71
+ '<span style="color: #888;">[STATUS]</span> ',
72
+ '<span style="color: #00ff00;">✓ Analysis complete</span>',
73
+ '</div>',
74
+
75
+ '<div style="margin-top: 8px;">',
76
+ '<span style="color: #888;">$ <span style="animation: blink 1s infinite;">_</span></span>',
77
+ '</div>',
78
+
79
+ '<style>',
80
+ '@keyframes blink {',
81
+ ' 0%, 50% { opacity: 1; }',
82
+ ' 51%, 100% { opacity: 0; }',
83
+ '}',
84
+ '</style>',
85
+ '</div>'
86
+ ))
87
+
88
+ terminal_output
89
+ })