From d8c395c378a15cdbdef9245c81f1b2e96247b9ed Mon Sep 17 00:00:00 2001 From: Mitch Capper Date: Wed, 27 Jul 2022 19:10:21 -0700 Subject: [PATCH] HTTPS/SSL Proxy support! This enables the use of a proxy for https requests. To do so pass the new --httpproxy-ssl (or %C) option. Note: This sends requests to the proxy over HTTP not HTTPS, most browsers use HTTPS and the connect command. All proxies may not work with this method, Fiddler does. Still needs full documentation and UI options added. --- html/fcguide.html | 9 +++++++++ man/httrack.1 | 4 ++++ src/htsalias.c | 1 + src/htsback.c | 2 +- src/htscoremain.c | 8 ++++++++ src/htshelp.c | 1 + src/htslib.c | 17 ++++++++++++++--- src/htsopt.h | 1 + 8 files changed, 39 insertions(+), 4 deletions(-) diff --git a/html/fcguide.html b/html/fcguide.html index 9d1ece2d..1da039ba 100644 --- a/html/fcguide.html +++ b/html/fcguide.html @@ -204,6 +204,7 @@

Basics

Proxy options: P proxy use (-P proxy:port or -P user:pass@proxy:port) (--proxy ) %f *use proxy for ftp (f0 don't use) (--httpproxy-ftp[=N]) + %C *use proxy for ssl (C0 don't use) (--httpproxy-ssl[=N]) Limits options: rN set the mirror depth to N (* r9999) (--depth[=N]) @@ -407,6 +408,7 @@

Syntax

   w *mirror web sites
  %f *use proxy for ftp (f0 don't use)
+ %C *use proxy for ssl (C0 don't use)
   cN number of multiple connections (*c8)
   RN number of retries, in case of timeout or non-fatal errors (*R1)
  %P *extended parsing, attempt to parse all links, even in unknown tags or Javascript (%P0 don't use)
@@ -442,6 +444,11 @@ 

Syntax

file transfer protocol (FTP) rather than the hypertext transfer protocol HTTP), go through an ftp proxy server to get them. +
 %C *use proxy for ssl (C0 don't use) 
+ +

If there are and links to ssl URLs (URLs using HTTPS rather than the hypertext transfer protocol +HTTP), go through the proxy server to get them. +

  cN number of multiple connections (*c8) 

Use up to 8 simultaneous downloads so that at any @@ -678,8 +685,10 @@

Proxy Options

Proxy options:
   P  proxy use (-P proxy:port or -P user:pass@proxy:port)
  %f *use proxy for ftp (f0 don't use)
+ %C *use proxy for ssl (C0 don't use)
 
+

If you are using a standard proxy that doesn't require a user ID and password, you would do something like this: diff --git a/man/httrack.1 b/man/httrack.1 index 889dfbc5..84c94a26 100644 --- a/man/httrack.1 +++ b/man/httrack.1 @@ -22,6 +22,8 @@ httrack \- offline browser : copy websites to a local directory ] [ .B \-%f, \-\-httpproxy\-ftp[=N] ] [ +.B \-%C, \-\-httpproxy\-ssl[=N] +] [ .B \-%b, \-\-bind ] [ .B \-rN, \-\-depth[=N] @@ -230,6 +232,8 @@ mirror ALL links located in the first level pages (mirror links) (\-\-mirrorlink proxy use (\-P proxy:port or \-P user:pass@proxy:port) (\-\-proxy ) .IP \-%f *use proxy for ftp (f0 don t use) (\-\-httpproxy\-ftp[=N]) +.IP \-%C +*use proxy for ssl (C0 don t use) (\-\-httpproxy\-ssl[=N]) .IP \-%b use this local hostname to make/send requests (\-%b hostname) (\-\-bind ) diff --git a/src/htsalias.c b/src/htsalias.c index f9ed7533..e0535541 100644 --- a/src/htsalias.c +++ b/src/htsalias.c @@ -85,6 +85,7 @@ const char *hts_optalias[][4] = { {"proxy", "-P", "param1", "proxy name:port"}, {"bind", "-%b", "param1", "hostname to bind"}, {"httpproxy-ftp", "-%f", "param", ""}, + {"httpproxy-ssl", "-%C", "param", ""}, {"depth", "-r", "param", ""}, {"recurse-levels", "-r", "param", ""}, {"ext-depth", "-%e", "param", ""}, {"max-files", "-m", "param", ""}, diff --git a/src/htsback.c b/src/htsback.c index f56ae769..c96e5ea0 100644 --- a/src/htsback.c +++ b/src/htsback.c @@ -1955,7 +1955,7 @@ int back_add(struct_back * sback, httrackp * opt, cache_back * cache, const char } } #if HTS_USEOPENSSL - else if (strfield(back[p].url_adr, "https://")) { // let's rock + else if (strfield(back[p].url_adr, "https://") && ! opt->https_proxy) { // let's rock back[p].r.ssl = 1; // back[p].r.ssl_soc = NULL; back[p].r.ssl_con = NULL; diff --git a/src/htscoremain.c b/src/htscoremain.c index cf780e0d..6ed160f9 100644 --- a/src/htscoremain.c +++ b/src/htscoremain.c @@ -1530,6 +1530,13 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) { com++; } break; // pas de compression + case 'C': + opt->https_proxy = 1; + if (*(com + 1) == '0') { + opt->https_proxy = 0; + com++; + } + break; case 'f': opt->ftp_proxy = 1; if (*(com + 1) == '0') { @@ -2669,6 +2676,7 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) { na++; opt->proxy.active = 1; // Rechercher MAIS en partant de la fin à cause de user:pass@proxy:port + // trans: Search BUT starting from the end because of user:pass@proxy:port a = argv[na] + strlen(argv[na]) - 1; // a=strstr(argv[na],":"); // port while((a > argv[na]) && (*a != ':') && (*a != '@')) diff --git a/src/htshelp.c b/src/htshelp.c index a5994d54..fe9917ca 100644 --- a/src/htshelp.c +++ b/src/htshelp.c @@ -508,6 +508,7 @@ void help(const char *app, int more) { infomsg(""); infomsg("Proxy options:"); infomsg(" P proxy use (-P proxy:port or -P user:pass@proxy:port)"); + infomsg(" %C *use proxy for ssl (C0 don't use)"); infomsg(" %f *use proxy for ftp (f0 don't use)"); infomsg(" %b use this local hostname to make/send requests (-%b hostname)"); infomsg(""); diff --git a/src/htslib.c b/src/htslib.c index dbb2839f..6194fa99 100644 --- a/src/htslib.c +++ b/src/htslib.c @@ -681,7 +681,7 @@ T_SOC http_xfopen(httrackp * opt, int mode, int treat, int waitconnect, if (retour) { if ((!(retour->req.proxy.active)) || ((strcmp(adr, "file://") == 0) - || (strncmp(adr, "https://", 8) == 0) + || (strncmp(adr, "https://", 8) == 0 && ! opt->https_proxy) ) ) { /* pas de proxy, ou non utilisable ici */ soc = newhttp(opt, adr, retour, -1, waitconnect); @@ -951,13 +951,14 @@ int http_sendhead(httrackp * opt, t_cookie * cookie, int mode, } // si on gère un proxy, il faut une Absolute URI: on ajoute avant http://www.adr.dom - if (retour->req.proxy.active && (strncmp(adr, "https://", 8) != 0)) { + BOOL is_https = strncmp(adr, "https://", 8) == 0; + if (retour->req.proxy.active && (! is_https || opt->https_proxy)) { if (!link_has_authority(adr)) { // default http #if HDEBUG printf("Proxy Use: for %s%s proxy %d port %d\n", adr, fil, retour->req.proxy.name, retour->req.proxy.port); #endif - print_buffer(&bstr, "http://%s", jump_identification_const(adr)); + print_buffer(&bstr, is_https ? "https://%s" : "http://%s", jump_identification_const(adr)); } else { // ftp:// en proxy http #if HDEBUG printf("Proxy Use for ftp: for %s%s proxy %d port %d\n", adr, fil, @@ -2018,6 +2019,12 @@ LLint http_xfread1(htsblk * r, int bufl) { // en cas de moved xx, dans location // abandonne désormais au bout de 30 secondes (aurevoir les sites // qui nous font poireauter 5 heures..) -> -2=timeout +//trans: +// test if a URL (validity, header, size) +// returns 200 or the error code (404=NOT FOUND, etc) +// in case of moved xx, in location +// now give up after 30 seconds (goodbye sites +// which make us hang around for 5 hours..) -> -2=timeout htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc) { T_SOC soc; htsblk retour; @@ -2123,6 +2130,9 @@ htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc) { // Crée un lien (http) vers une adresse internet iadr // retour: structure (adresse, taille, message si erreur (si !adr)) // peut ouvrir avec des connect() non bloquants: waitconnect=0/1 +// trans: Create a link (http) to an iadr internet address +// return: structure (address, size, message if error (if !adr)) +// can open with non-blocking connect(): waitconnect=0/1 T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port, int waitconnect) { T_SOC soc; // descipteur de la socket @@ -5513,6 +5523,7 @@ HTSEXT_API httrackp *hts_create_opt(void) { opt->urlhack = 1; // url hack (normalizer) StringCopy(opt->footer, HTS_DEFAULT_FOOTER); opt->ftp_proxy = 1; // proxy http pour ftp + opt->https_proxy = 0; // proxy http pour https opt->convert_utf8 = 1; // convert html to UTF-8 StringCopy(opt->filelist, ""); StringCopy(opt->lang_iso, "en, *"); diff --git a/src/htsopt.h b/src/htsopt.h index 9f128a46..afddf0eb 100644 --- a/src/htsopt.h +++ b/src/htsopt.h @@ -370,6 +370,7 @@ struct httrackp { int maxcache; // maximum en mémoire au niveau du cache (backing) //int maxcache_anticipate; // maximum de liens à anticiper (majorant) int ftp_proxy; // proxy http pour ftp + int https_proxy; // proxy http pour https String filelist; // fichier liste URL à inclure String urllist; // fichier liste de filtres à inclure htsfilters filters; // contient les pointeurs pour les filtres