[Privoxy-devel] 0007-Create-a-fast-CGI-function

Lee ler762 at protonmail.com
Thu Aug 3 17:45:24 CEST 2023


Empty Message
-------------- next part --------------
From b745be95e2df609729b1b9cb08c1bd8f222e03d7 Mon Sep 17 00:00:00 2001
From: Lee <ler762 at users.sourceforge.net>
Date: Tue, 1 Aug 2023 02:29:16 -0400
Subject: [PATCH 7/9] Create a fast CGI function to determine if a URL will be
 blocked.

Calling cgi_show_url_info is slow.

If it's a person doing the call from a browser, a cgi processing time of 10s
of millisends isn't going to make much difference.

But if it's a program processing a "is this url blocked or no" file of over
100K lines, an extra 2ms per call adds up to over 200 seconds.

So create a new cgi_show_url_final_info function that
- keep a short in-memory template instead of reading it in from the disk
- returns just the "final info" instead of the intermediate results and then
  the final info.
- skips processing of multi-actions (things that can be applied multiple
  times like filter{}).  Hence the new merge_single_actions function.
---
 actions.c   |  48 +++++++++++
 actions.h   |   2 +
 cgi.c       |   4 +
 cgisimple.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 cgisimple.h |   3 +
 5 files changed, 293 insertions(+)

diff --git a/actions.c b/actions.c
index 379c5e97..c181ca72 100644
--- a/actions.c
+++ b/actions.c
@@ -784,6 +784,54 @@ jb_err merge_current_action (struct current_action_spec *dest,
 }
 
 
+/*********************************************************************
+ *
+ * Function    :  merge_single_actions
+ *      same thing as merge_current_action except
+ *      skip processing of multi actions
+ *       (things that can be applied multiple times like filter{})
+ *
+ * Description :  Merge two actions together.
+ *                Similar to "dest += src".
+ *                Differences between this and merge_actions()
+ *                is that this one doesn't allocate memory for
+ *                strings (so "src" better be in memory for at least
+ *                as long as "dest" is, and you'd better free
+ *                "dest" using "free_current_action").
+ *                Also, there is no  mask or remove lists in dest.
+ *                (If we're applying it to a URL, we don't need them)
+ *
+ * Parameters  :
+ *          1  :  dest = Current actions, to modify.
+ *          2  :  src = Action to add.
+ *
+ * Returns  0  :  no error
+ *        !=0  :  error, probably JB_ERR_MEMORY.
+ *
+ *********************************************************************/
+jb_err merge_single_actions (struct current_action_spec *dest,
+                             const struct action_spec *src)
+{
+   int i;
+   jb_err err = JB_ERR_OK;
+
+   dest->flags  &= src->mask;
+   dest->flags  |= src->add;
+
+   for (i = 0; i < ACTION_STRING_COUNT; i++)
+   {
+      char * str = src->string[i];
+      if (str)
+      {
+         str = strdup_or_die(str);
+         freez(dest->string[i]);
+         dest->string[i] = str;
+      }
+   }
+   return err;
+}
+
+
 /*********************************************************************
  *
  * Function    :  update_action_bits_for_tag
diff --git a/actions.h b/actions.h
index 618bc255..a797b98a 100644
--- a/actions.h
+++ b/actions.h
@@ -70,6 +70,8 @@ extern void init_current_action     (struct current_action_spec *dest);
 extern void free_current_action     (struct current_action_spec *src);
 extern jb_err merge_current_action  (struct current_action_spec *dest,
                                      const struct action_spec *src);
+extern jb_err merge_single_actions  (struct current_action_spec *dest,
+                                     const struct action_spec *src);
 extern char * current_action_to_html(const struct client_state *csp,
                                      const struct current_action_spec *action);
 extern char * actions_to_line_of_text(const struct current_action_spec *action);
diff --git a/cgi.c b/cgi.c
index d60166f2..c067f696 100644
--- a/cgi.c
+++ b/cgi.c
@@ -117,6 +117,10 @@ static const struct cgi_dispatcher cgi_dispatchers[] = {
          cgi_show_url_info,
          "Look up which actions apply to a URL and why",
          TRUE },
+   { "show-url-final-info",
+         cgi_show_url_final_info,
+         "Look up the final actions that apply to a URL",
+         TRUE },
 #ifdef FEATURE_TOGGLE
    { "toggle",
          cgi_toggle,
diff --git a/cgisimple.c b/cgisimple.c
index 961510f2..99a352cb 100644
--- a/cgisimple.c
+++ b/cgisimple.c
@@ -1924,6 +1924,242 @@ jb_err cgi_show_url_info(struct client_state *csp,
 }
 
 
+/*********************************************************************
+ *
+ * Function    :  cgi_show_url_final_info
+ *
+ * Description :  CGI function that shows just the "Final results:"
+ *                section from cgi_show_url_info.
+ *                If all you want to know is if a URL would be blocked
+ *                or not, this is the function for you!
+ *
+ * Parameters  :
+ *          1  :  csp = Current client state (buffers, headers, etc...)
+ *          2  :  rsp = http_response data structure for output
+ *          3  :  parameters = map of cgi parameters
+ *
+ * CGI Parameters :
+ *            url : The url whose actions are to be determined.
+ *                  If url is unset, the url-given conditional will be
+ *                  set, so that all but the form can be suppressed in
+ *                  the template.
+ *
+ * Returns     :  JB_ERR_OK on success
+ *                JB_ERR_MEMORY on out-of-memory error.
+ *
+ *********************************************************************/
+jb_err cgi_show_url_final_info(struct client_state *csp,
+                               struct http_response *rsp,
+                               const struct map *parameters)
+{
+   char *url_param;
+   struct map *exports;
+   char body[] = \
+"<!DOCTYPE html><html lang=\"en\"><head><title>URL Block Info</title></head>\n"\
+"<body><table cellpadding=\"20\" cellspacing=\"10\" border=\"0\" width=\"100%\">\n"\
+"<!-- @if-url-given-start -->"\
+"<!-- @if-valid-url-start -->\n"\
+"<tr><td><h2>Final results:</h2>\n"\
+"<b>@final@</b>\n"\
+"</td></tr>\n"\
+"<!-- if-valid-url-end@ -->"\
+"<!-- if-url-given-end@ -->\n"\
+"<tr><td><h2>Look up the actions for a URL:</h2>\n"\
+"<form method=\"GET\" action=\"@default-cgi at show-url-final-info\">\n"\
+"<input type=\"text\" name=\"url\" size=\"80\" value=\"@url@\"><input type=\"submit\" value=\"Go\">\n"\
+"</form></td></tr></table></body></html>\n";
+
+   assert(csp);
+   assert(rsp);
+   assert(parameters);
+
+   if (NULL == (exports = default_exports(csp, "show-url-final-info")))
+   {
+      return JB_ERR_MEMORY;
+   }
+
+   /*
+    * Get the url= parameter (if present) and remove any leading/trailing spaces.
+    */
+   url_param = strdup_or_die(lookup(parameters, "url"));
+   chomp(url_param);
+
+   /*
+    * Handle prefixes.  4 possibilities:
+    * 1) "http://" or "https://" prefix present and followed by URL - OK
+    * 2) Only the "http://" or "https://" part is present, no URL - change
+    *    to empty string so it will be detected later as "no URL".
+    * 3) Parameter specified but doesn't start with "http(s?)://" - add a
+    *    "http://" prefix.
+    * 4) Parameter not specified or is empty string - let this fall through
+    *    for now, next block of code will handle it.
+    */
+   if (0 == strncmp(url_param, "http://", 7))
+   {
+      if (url_param[7] == '\0')
+      {
+         /*
+          * Empty URL (just prefix).
+          * Make it totally empty so it's caught by the next if ()
+          */
+         url_param[0] = '\0';
+      }
+   }
+   else if (0 == strncmp(url_param, "https://", 8))
+   {
+      if (url_param[8] == '\0')
+      {
+         /*
+          * Empty URL (just prefix).
+          * Make it totally empty so it's caught by the next if ()
+          */
+         url_param[0] = '\0';
+      }
+   }
+   else if ((url_param[0] != '\0')
+      && ((NULL == strstr(url_param, "://")
+            || (strstr(url_param, "://") > strstr(url_param, "/")))))
+   {
+      /*
+       * No prefix or at least no prefix before
+       * the first slash - assume http://
+       */
+      char *url_param_prefixed = strdup_or_die("http://");
+
+      if (JB_ERR_OK != string_join(&url_param_prefixed, url_param))
+      {
+         free_map(exports);
+         return JB_ERR_MEMORY;
+      }
+      url_param = url_param_prefixed;
+   }
+
+   if (url_param[0] == '\0')
+   {
+      /* URL paramater not specified, display query form only. */
+      free(url_param);
+      if (map_block_killer(exports, "url-given")
+        || map(exports, "url", 1, "", 1))
+      {
+         free_map(exports);
+         return JB_ERR_MEMORY;
+      }
+   }
+   else
+   {
+      /* Given a URL, so query it. */
+      jb_err err;
+      char *s;
+      struct file_list *fl;
+      struct url_actions *b;
+      struct http_request url_to_query[1];
+      struct current_action_spec action[1];
+      int i;
+
+      if (map(exports, "url", 1, html_encode(url_param), 0))
+      {
+         free(url_param);
+         free_map(exports);
+         return JB_ERR_MEMORY;
+      }
+
+      init_current_action(action);
+
+      if (map(exports, "default", 1, current_action_to_html(csp, action), 0))
+      {
+         free_current_action(action);
+         free(url_param);
+         free_map(exports);
+         return JB_ERR_MEMORY;
+      }
+
+      memset(url_to_query, '\0', sizeof(url_to_query));
+      err = parse_http_url(url_param, url_to_query, REQUIRE_PROTOCOL);
+      assert((err != JB_ERR_OK) || (url_to_query->ssl == !strncmpic(url_param, "https://", 8)));
+
+      free(url_param);
+
+      if (err == JB_ERR_MEMORY)
+      {
+         free_http_request(url_to_query);
+         free_current_action(action);
+         free_map(exports);
+         return JB_ERR_MEMORY;
+      }
+      else if (err)
+      {
+         /* Invalid URL */
+
+         err = map(exports, "matches", 1, "<b>[Invalid URL specified!]</b>" , 1);
+         if (!err) err = map(exports, "final", 1, lookup(exports, "default"), 1);
+         if (!err) err = map_block_killer(exports, "valid-url");
+
+         free_current_action(action);
+         free_http_request(url_to_query);
+
+         if (err)
+         {
+            free_map(exports);
+            return JB_ERR_MEMORY;
+         }
+
+         return template_fill_for_cgi(csp, "show-url-final-info", exports, rsp);
+      }
+
+      for (i = 0; i < MAX_AF_FILES; i++)
+      {
+         if (NULL == csp->config->actions_file_short[i]
+             || !strcmp(csp->config->actions_file_short[i], "standard.action")) continue;
+
+         b = NULL;
+         if ((fl = csp->actions_list[i]) != NULL)
+         {
+            if ((b = fl->f) != NULL)
+            {
+               b = b->next;
+            }
+         }
+
+         for ( ; b != NULL; b = b->next)
+         {
+            if (url_match(b->url, url_to_query))
+            {
+               /* if (merge_current_action(action, b->action))   -LR-  orig */
+               if (merge_single_actions(action, b->action))
+               {
+                  free_http_request(url_to_query);
+                  free_current_action(action);
+                  free_map(exports);
+                  return JB_ERR_MEMORY;
+               }
+            }
+         }
+      }
+
+      free_current_action(csp->action);
+      get_url_actions(csp, url_to_query);
+
+      free_http_request(url_to_query);
+
+      s = current_action_to_html(csp, action);
+
+      free_current_action(action);
+
+      if (map(exports, "final", 1, s, 0))
+      {
+         free_map(exports);
+         return JB_ERR_MEMORY;
+      }
+   }
+
+   /* return template_fill_for_cgi(csp, "show-url-final-info", exports, rsp);   -LR- */
+   rsp->body = strdup_or_die(body);
+   template_fill(&rsp->body, exports);
+   free_map(exports);
+   return 0;
+}
+
+
 /*********************************************************************
  *
  * Function    :  cgi_robots_txt
diff --git a/cgisimple.h b/cgisimple.h
index ab975dc6..50a639b5 100644
--- a/cgisimple.h
+++ b/cgisimple.h
@@ -61,6 +61,9 @@ extern jb_err cgi_show_status  (struct client_state *csp,
 extern jb_err cgi_show_url_info(struct client_state *csp,
                                 struct http_response *rsp,
                                 const struct map *parameters);
+extern jb_err cgi_show_url_final_info(struct client_state *csp,
+                                      struct http_response *rsp,
+                                      const struct map *parameters);
 extern jb_err cgi_show_request (struct client_state *csp,
                                 struct http_response *rsp,
                                 const struct map *parameters);
-- 
2.39.0



More information about the Privoxy-devel mailing list