[Privoxy-devel] 0007-Create-a-fast-CGI-function
Lee
ler762 at protonmail.com
Thu Aug 3 17:45:24 CEST 2023
Empty Message
-------------- next part --------------
From b745be95e2df609729b1b9cb08c1bd8f222e03d7 Mon Sep 17 00:00:00 2001
From: Lee <ler762 at users.sourceforge.net>
Date: Tue, 1 Aug 2023 02:29:16 -0400
Subject: [PATCH 7/9] Create a fast CGI function to determine if a URL will be
blocked.
Calling cgi_show_url_info is slow.
If it's a person doing the call from a browser, a cgi processing time of 10s
of millisends isn't going to make much difference.
But if it's a program processing a "is this url blocked or no" file of over
100K lines, an extra 2ms per call adds up to over 200 seconds.
So create a new cgi_show_url_final_info function that
- keep a short in-memory template instead of reading it in from the disk
- returns just the "final info" instead of the intermediate results and then
the final info.
- skips processing of multi-actions (things that can be applied multiple
times like filter{}). Hence the new merge_single_actions function.
---
actions.c | 48 +++++++++++
actions.h | 2 +
cgi.c | 4 +
cgisimple.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++
cgisimple.h | 3 +
5 files changed, 293 insertions(+)
diff --git a/actions.c b/actions.c
index 379c5e97..c181ca72 100644
--- a/actions.c
+++ b/actions.c
@@ -784,6 +784,54 @@ jb_err merge_current_action (struct current_action_spec *dest,
}
+/*********************************************************************
+ *
+ * Function : merge_single_actions
+ * same thing as merge_current_action except
+ * skip processing of multi actions
+ * (things that can be applied multiple times like filter{})
+ *
+ * Description : Merge two actions together.
+ * Similar to "dest += src".
+ * Differences between this and merge_actions()
+ * is that this one doesn't allocate memory for
+ * strings (so "src" better be in memory for at least
+ * as long as "dest" is, and you'd better free
+ * "dest" using "free_current_action").
+ * Also, there is no mask or remove lists in dest.
+ * (If we're applying it to a URL, we don't need them)
+ *
+ * Parameters :
+ * 1 : dest = Current actions, to modify.
+ * 2 : src = Action to add.
+ *
+ * Returns 0 : no error
+ * !=0 : error, probably JB_ERR_MEMORY.
+ *
+ *********************************************************************/
+jb_err merge_single_actions (struct current_action_spec *dest,
+ const struct action_spec *src)
+{
+ int i;
+ jb_err err = JB_ERR_OK;
+
+ dest->flags &= src->mask;
+ dest->flags |= src->add;
+
+ for (i = 0; i < ACTION_STRING_COUNT; i++)
+ {
+ char * str = src->string[i];
+ if (str)
+ {
+ str = strdup_or_die(str);
+ freez(dest->string[i]);
+ dest->string[i] = str;
+ }
+ }
+ return err;
+}
+
+
/*********************************************************************
*
* Function : update_action_bits_for_tag
diff --git a/actions.h b/actions.h
index 618bc255..a797b98a 100644
--- a/actions.h
+++ b/actions.h
@@ -70,6 +70,8 @@ extern void init_current_action (struct current_action_spec *dest);
extern void free_current_action (struct current_action_spec *src);
extern jb_err merge_current_action (struct current_action_spec *dest,
const struct action_spec *src);
+extern jb_err merge_single_actions (struct current_action_spec *dest,
+ const struct action_spec *src);
extern char * current_action_to_html(const struct client_state *csp,
const struct current_action_spec *action);
extern char * actions_to_line_of_text(const struct current_action_spec *action);
diff --git a/cgi.c b/cgi.c
index d60166f2..c067f696 100644
--- a/cgi.c
+++ b/cgi.c
@@ -117,6 +117,10 @@ static const struct cgi_dispatcher cgi_dispatchers[] = {
cgi_show_url_info,
"Look up which actions apply to a URL and why",
TRUE },
+ { "show-url-final-info",
+ cgi_show_url_final_info,
+ "Look up the final actions that apply to a URL",
+ TRUE },
#ifdef FEATURE_TOGGLE
{ "toggle",
cgi_toggle,
diff --git a/cgisimple.c b/cgisimple.c
index 961510f2..99a352cb 100644
--- a/cgisimple.c
+++ b/cgisimple.c
@@ -1924,6 +1924,242 @@ jb_err cgi_show_url_info(struct client_state *csp,
}
+/*********************************************************************
+ *
+ * Function : cgi_show_url_final_info
+ *
+ * Description : CGI function that shows just the "Final results:"
+ * section from cgi_show_url_info.
+ * If all you want to know is if a URL would be blocked
+ * or not, this is the function for you!
+ *
+ * Parameters :
+ * 1 : csp = Current client state (buffers, headers, etc...)
+ * 2 : rsp = http_response data structure for output
+ * 3 : parameters = map of cgi parameters
+ *
+ * CGI Parameters :
+ * url : The url whose actions are to be determined.
+ * If url is unset, the url-given conditional will be
+ * set, so that all but the form can be suppressed in
+ * the template.
+ *
+ * Returns : JB_ERR_OK on success
+ * JB_ERR_MEMORY on out-of-memory error.
+ *
+ *********************************************************************/
+jb_err cgi_show_url_final_info(struct client_state *csp,
+ struct http_response *rsp,
+ const struct map *parameters)
+{
+ char *url_param;
+ struct map *exports;
+ char body[] = \
+"<!DOCTYPE html><html lang=\"en\"><head><title>URL Block Info</title></head>\n"\
+"<body><table cellpadding=\"20\" cellspacing=\"10\" border=\"0\" width=\"100%\">\n"\
+"<!-- @if-url-given-start -->"\
+"<!-- @if-valid-url-start -->\n"\
+"<tr><td><h2>Final results:</h2>\n"\
+"<b>@final@</b>\n"\
+"</td></tr>\n"\
+"<!-- if-valid-url-end@ -->"\
+"<!-- if-url-given-end@ -->\n"\
+"<tr><td><h2>Look up the actions for a URL:</h2>\n"\
+"<form method=\"GET\" action=\"@default-cgi at show-url-final-info\">\n"\
+"<input type=\"text\" name=\"url\" size=\"80\" value=\"@url@\"><input type=\"submit\" value=\"Go\">\n"\
+"</form></td></tr></table></body></html>\n";
+
+ assert(csp);
+ assert(rsp);
+ assert(parameters);
+
+ if (NULL == (exports = default_exports(csp, "show-url-final-info")))
+ {
+ return JB_ERR_MEMORY;
+ }
+
+ /*
+ * Get the url= parameter (if present) and remove any leading/trailing spaces.
+ */
+ url_param = strdup_or_die(lookup(parameters, "url"));
+ chomp(url_param);
+
+ /*
+ * Handle prefixes. 4 possibilities:
+ * 1) "http://" or "https://" prefix present and followed by URL - OK
+ * 2) Only the "http://" or "https://" part is present, no URL - change
+ * to empty string so it will be detected later as "no URL".
+ * 3) Parameter specified but doesn't start with "http(s?)://" - add a
+ * "http://" prefix.
+ * 4) Parameter not specified or is empty string - let this fall through
+ * for now, next block of code will handle it.
+ */
+ if (0 == strncmp(url_param, "http://", 7))
+ {
+ if (url_param[7] == '\0')
+ {
+ /*
+ * Empty URL (just prefix).
+ * Make it totally empty so it's caught by the next if ()
+ */
+ url_param[0] = '\0';
+ }
+ }
+ else if (0 == strncmp(url_param, "https://", 8))
+ {
+ if (url_param[8] == '\0')
+ {
+ /*
+ * Empty URL (just prefix).
+ * Make it totally empty so it's caught by the next if ()
+ */
+ url_param[0] = '\0';
+ }
+ }
+ else if ((url_param[0] != '\0')
+ && ((NULL == strstr(url_param, "://")
+ || (strstr(url_param, "://") > strstr(url_param, "/")))))
+ {
+ /*
+ * No prefix or at least no prefix before
+ * the first slash - assume http://
+ */
+ char *url_param_prefixed = strdup_or_die("http://");
+
+ if (JB_ERR_OK != string_join(&url_param_prefixed, url_param))
+ {
+ free_map(exports);
+ return JB_ERR_MEMORY;
+ }
+ url_param = url_param_prefixed;
+ }
+
+ if (url_param[0] == '\0')
+ {
+ /* URL paramater not specified, display query form only. */
+ free(url_param);
+ if (map_block_killer(exports, "url-given")
+ || map(exports, "url", 1, "", 1))
+ {
+ free_map(exports);
+ return JB_ERR_MEMORY;
+ }
+ }
+ else
+ {
+ /* Given a URL, so query it. */
+ jb_err err;
+ char *s;
+ struct file_list *fl;
+ struct url_actions *b;
+ struct http_request url_to_query[1];
+ struct current_action_spec action[1];
+ int i;
+
+ if (map(exports, "url", 1, html_encode(url_param), 0))
+ {
+ free(url_param);
+ free_map(exports);
+ return JB_ERR_MEMORY;
+ }
+
+ init_current_action(action);
+
+ if (map(exports, "default", 1, current_action_to_html(csp, action), 0))
+ {
+ free_current_action(action);
+ free(url_param);
+ free_map(exports);
+ return JB_ERR_MEMORY;
+ }
+
+ memset(url_to_query, '\0', sizeof(url_to_query));
+ err = parse_http_url(url_param, url_to_query, REQUIRE_PROTOCOL);
+ assert((err != JB_ERR_OK) || (url_to_query->ssl == !strncmpic(url_param, "https://", 8)));
+
+ free(url_param);
+
+ if (err == JB_ERR_MEMORY)
+ {
+ free_http_request(url_to_query);
+ free_current_action(action);
+ free_map(exports);
+ return JB_ERR_MEMORY;
+ }
+ else if (err)
+ {
+ /* Invalid URL */
+
+ err = map(exports, "matches", 1, "<b>[Invalid URL specified!]</b>" , 1);
+ if (!err) err = map(exports, "final", 1, lookup(exports, "default"), 1);
+ if (!err) err = map_block_killer(exports, "valid-url");
+
+ free_current_action(action);
+ free_http_request(url_to_query);
+
+ if (err)
+ {
+ free_map(exports);
+ return JB_ERR_MEMORY;
+ }
+
+ return template_fill_for_cgi(csp, "show-url-final-info", exports, rsp);
+ }
+
+ for (i = 0; i < MAX_AF_FILES; i++)
+ {
+ if (NULL == csp->config->actions_file_short[i]
+ || !strcmp(csp->config->actions_file_short[i], "standard.action")) continue;
+
+ b = NULL;
+ if ((fl = csp->actions_list[i]) != NULL)
+ {
+ if ((b = fl->f) != NULL)
+ {
+ b = b->next;
+ }
+ }
+
+ for ( ; b != NULL; b = b->next)
+ {
+ if (url_match(b->url, url_to_query))
+ {
+ /* if (merge_current_action(action, b->action)) -LR- orig */
+ if (merge_single_actions(action, b->action))
+ {
+ free_http_request(url_to_query);
+ free_current_action(action);
+ free_map(exports);
+ return JB_ERR_MEMORY;
+ }
+ }
+ }
+ }
+
+ free_current_action(csp->action);
+ get_url_actions(csp, url_to_query);
+
+ free_http_request(url_to_query);
+
+ s = current_action_to_html(csp, action);
+
+ free_current_action(action);
+
+ if (map(exports, "final", 1, s, 0))
+ {
+ free_map(exports);
+ return JB_ERR_MEMORY;
+ }
+ }
+
+ /* return template_fill_for_cgi(csp, "show-url-final-info", exports, rsp); -LR- */
+ rsp->body = strdup_or_die(body);
+ template_fill(&rsp->body, exports);
+ free_map(exports);
+ return 0;
+}
+
+
/*********************************************************************
*
* Function : cgi_robots_txt
diff --git a/cgisimple.h b/cgisimple.h
index ab975dc6..50a639b5 100644
--- a/cgisimple.h
+++ b/cgisimple.h
@@ -61,6 +61,9 @@ extern jb_err cgi_show_status (struct client_state *csp,
extern jb_err cgi_show_url_info(struct client_state *csp,
struct http_response *rsp,
const struct map *parameters);
+extern jb_err cgi_show_url_final_info(struct client_state *csp,
+ struct http_response *rsp,
+ const struct map *parameters);
extern jb_err cgi_show_request (struct client_state *csp,
struct http_response *rsp,
const struct map *parameters);
--
2.39.0
More information about the Privoxy-devel
mailing list