1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
|
/* Support for cookies.
Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Free
Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
GNU Wget is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
/* Written by Hrvoje Niksic. Parts are loosely inspired by the
cookie patch submitted by Tomasz Wegrzanowski.
This implements the client-side cookie support, as specified
(loosely) by Netscape's "preliminary specification", currently
available at:
http://wp.netscape.com/newsref/std/cookie_spec.html
rfc2109 is not supported because of its incompatibilities with the
above widely-used specification. rfc2965 is entirely ignored,
since popular client software doesn't implement it, and even the
sites that do send Set-Cookie2 also emit Set-Cookie for
compatibility. */
#include <config.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <errno.h>
#include <time.h>
#include "wget.h"
#include "utils.h"
#include "hash.h"
#include "cookies.h"
#include "http.h" /* for http_atotm */
/* Declarations of `struct cookie' and the most basic functions. */
/* Cookie jar serves as cookie storage and a means of retrieving
cookies efficiently. All cookies with the same domain are stored
in a linked list called "chain". A cookie chain can be reached by
looking up the domain in the cookie jar's chains_by_domain table.
For example, to reach all the cookies under google.com, one must
execute hash_table_get(jar->chains_by_domain, "google.com"). Of
course, when sending a cookie to `www.google.com', one must search
for cookies that belong to either `www.google.com' or `google.com'
-- but the point is that the code doesn't need to go through *all*
the cookies. */
struct cookie_jar {
/* Cookie chains indexed by domain. */
struct hash_table *chains;
int cookie_count; /* number of cookies in the jar. */
};
/* Value set by entry point functions, so that the low-level
routines don't need to call time() all the time. */
static time_t cookies_now;
struct cookie_jar *
cookie_jar_new (void)
{
struct cookie_jar *jar = xnew (struct cookie_jar);
jar->chains = make_nocase_string_hash_table (0);
jar->cookie_count = 0;
return jar;
}
struct cookie {
char *domain; /* domain of the cookie */
int port; /* port number */
char *path; /* path prefix of the cookie */
unsigned discard_requested :1; /* whether cookie was created to
request discarding another
cookie. */
unsigned secure :1; /* whether cookie should be
transmitted over non-https
connections. */
unsigned domain_exact :1; /* whether DOMAIN must match as a
whole. */
unsigned permanent :1; /* whether the cookie should outlive
the session. */
time_t expiry_time; /* time when the cookie expires, 0
means undetermined. */
char *attr; /* cookie attribute name */
char *value; /* cookie attribute value */
struct cookie *next; /* used for chaining of cookies in the
same domain. */
};
#define PORT_ANY (-1)
/* Allocate and return a new, empty cookie structure. */
static struct cookie *
cookie_new (void)
{
struct cookie *cookie = xnew0 (struct cookie);
/* Both cookie->permanent and cookie->expiry_time are now 0. This
means that the cookie doesn't expire, but is only valid for this
session (i.e. not written out to disk). */
cookie->port = PORT_ANY;
return cookie;
}
/* Non-zero if the cookie has expired. Assumes cookies_now has been
set by one of the entry point functions. */
static bool
cookie_expired_p (const struct cookie *c)
{
return c->expiry_time != 0 && c->expiry_time < cookies_now;
}
/* Deallocate COOKIE and its components. */
static void
delete_cookie (struct cookie *cookie)
{
xfree_null (cookie->domain);
xfree_null (cookie->path);
xfree_null (cookie->attr);
xfree_null (cookie->value);
xfree (cookie);
}
/* Functions for storing cookies.
All cookies can be reached beginning with jar->chains. The key in
that table is the domain name, and the value is a linked list of
all cookies from that domain. Every new cookie is placed on the
head of the list. */
/* Find and return a cookie in JAR whose domain, path, and attribute
name correspond to COOKIE. If found, PREVPTR will point to the
location of the cookie previous in chain, or NULL if the found
cookie is the head of a chain.
If no matching cookie is found, return NULL. */
static struct cookie *
find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
struct cookie **prevptr)
{
struct cookie *chain, *prev;
chain = hash_table_get (jar->chains, cookie->domain);
if (!chain)
goto nomatch;
prev = NULL;
for (; chain; prev = chain, chain = chain->next)
if (0 == strcmp (cookie->path, chain->path)
&& 0 == strcmp (cookie->attr, chain->attr)
&& cookie->port == chain->port)
{
*prevptr = prev;
return chain;
}
nomatch:
*prevptr = NULL;
return NULL;
}
/* Store COOKIE to the jar.
This is done by placing COOKIE at the head of its chain. However,
if COOKIE matches a cookie already in memory, as determined by
find_matching_cookie, the old cookie is unlinked and destroyed.
The key of each chain's hash table entry is allocated only the
first time; next hash_table_put's reuse the same key. */
static void
store_cookie (struct cookie_jar *jar, struct cookie *cookie)
{
struct cookie *chain_head;
char *chain_key;
if (hash_table_get_pair (jar->chains, cookie->domain,
&chain_key, &chain_head))
{
/* A chain of cookies in this domain already exists. Check for
duplicates -- if an extant cookie exactly matches our domain,
port, path, and name, replace it. */
struct cookie *prev;
struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
if (victim)
{
/* Remove VICTIM from the chain. COOKIE will be placed at
the head. */
if (prev)
{
prev->next = victim->next;
cookie->next = chain_head;
}
else
{
/* prev is NULL; apparently VICTIM was at the head of
the chain. This place will be taken by COOKIE, so
all we need to do is: */
cookie->next = victim->next;
}
delete_cookie (victim);
--jar->cookie_count;
DEBUGP (("Deleted old cookie (to be replaced.)\n"));
}
else
cookie->next = chain_head;
}
else
{
/* We are now creating the chain. Use a copy of cookie->domain
as the key for the life-time of the chain. Using
cookie->domain would be unsafe because the life-time of the
chain may exceed the life-time of the cookie. (Cookies may
be deleted from the chain by this very function.) */
cookie->next = NULL;
chain_key = xstrdup (cookie->domain);
}
hash_table_put (jar->chains, chain_key, cookie);
++jar->cookie_count;
IF_DEBUG
{
time_t exptime = cookie->expiry_time;
DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
cookie->domain, cookie->port,
cookie->port == PORT_ANY ? " (ANY)" : "",
cookie->path,
cookie->permanent ? "permanent" : "session",
cookie->secure ? "secure" : "insecure",
cookie->expiry_time ? datetime_str (exptime) : "none",
cookie->attr, cookie->value));
}
}
/* Discard a cookie matching COOKIE's domain, port, path, and
attribute name. This gets called when we encounter a cookie whose
expiry date is in the past, or whose max-age is set to 0. The
former corresponds to netscape cookie spec, while the latter is
specified by rfc2109. */
static void
discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
{
struct cookie *prev, *victim;
if (!hash_table_count (jar->chains))
/* No elements == nothing to discard. */
return;
victim = find_matching_cookie (jar, cookie, &prev);
if (victim)
{
if (prev)
/* Simply unchain the victim. */
prev->next = victim->next;
else
{
/* VICTIM was head of its chain. We need to place a new
cookie at the head. */
char *chain_key = NULL;
int res;
res = hash_table_get_pair (jar->chains, victim->domain,
&chain_key, NULL);
assert (res != 0);
if (!victim->next)
{
/* VICTIM was the only cookie in the chain. Destroy the
chain and deallocate the chain key. */
hash_table_remove (jar->chains, victim->domain);
xfree (chain_key);
}
else
hash_table_put (jar->chains, chain_key, victim->next);
}
delete_cookie (victim);
DEBUGP (("Discarded old cookie.\n"));
}
}
/* Functions for parsing the `Set-Cookie' header, and creating new
cookies from the wire. */
#define TOKEN_IS(token, string_literal) \
BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
#define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
/* Parse the contents of the `Set-Cookie' header. The header looks
like this:
name1=value1; name2=value2; ...
Trailing semicolon is optional; spaces are allowed between all
tokens. Additionally, values may be quoted.
A new cookie is returned upon success, NULL otherwise.
The first name-value pair will be used to set the cookie's
attribute name and value. Subsequent parameters will be checked
against field names such as `domain', `path', etc. Recognized
fields will be parsed and the corresponding members of COOKIE
filled. */
static struct cookie *
parse_set_cookie (const char *set_cookie, bool silent)
{
const char *ptr = set_cookie;
struct cookie *cookie = cookie_new ();
param_token name, value;
if (!extract_param (&ptr, &name, &value, ';'))
goto error;
if (!value.b)
goto error;
cookie->attr = strdupdelim (name.b, name.e);
cookie->value = strdupdelim (value.b, value.e);
while (extract_param (&ptr, &name, &value, ';'))
{
if (TOKEN_IS (name, "domain"))
{
if (!TOKEN_NON_EMPTY (value))
goto error;
xfree_null (cookie->domain);
/* Strictly speaking, we should set cookie->domain_exact if the
domain doesn't begin with a dot. But many sites set the
domain to "foo.com" and expect "subhost.foo.com" to get the
cookie, and it apparently works in browsers. */
if (*value.b == '.')
++value.b;
cookie->domain = strdupdelim (value.b, value.e);
}
else if (TOKEN_IS (name, "path"))
{
if (!TOKEN_NON_EMPTY (value))
goto error;
xfree_null (cookie->path);
cookie->path = strdupdelim (value.b, value.e);
}
else if (TOKEN_IS (name, "expires"))
{
char *value_copy;
time_t expires;
if (!TOKEN_NON_EMPTY (value))
goto error;
BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
expires = http_atotm (value_copy);
if (expires != (time_t) -1)
{
cookie->permanent = 1;
cookie->expiry_time = expires;
/* According to netscape's specification, expiry time in
the past means that discarding of a matching cookie
is requested. */
if (cookie->expiry_time < cookies_now)
cookie->discard_requested = 1;
}
else
/* Error in expiration spec. Assume default (cookie doesn't
expire, but valid only for this session.) */
;
}
else if (TOKEN_IS (name, "max-age"))
{
double maxage = -1;
char *value_copy;
if (!TOKEN_NON_EMPTY (value))
goto error;
BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
sscanf (value_copy, "%lf", &maxage);
if (maxage == -1)
/* something went wrong. */
goto error;
cookie->permanent = 1;
cookie->expiry_time = cookies_now + maxage;
/* According to rfc2109, a cookie with max-age of 0 means that
discarding of a matching cookie is requested. */
if (maxage == 0)
cookie->discard_requested = 1;
}
else if (TOKEN_IS (name, "secure"))
{
/* ignore value completely */
cookie->secure = 1;
}
else
/* Ignore unrecognized attribute. */
;
}
if (*ptr)
/* extract_param has encountered a syntax error */
goto error;
/* The cookie has been successfully constructed; return it. */
return cookie;
error:
if (!silent)
logprintf (LOG_NOTQUIET,
_("Syntax error in Set-Cookie: %s at position %d.\n"),
escnonprint (set_cookie), (int) (ptr - set_cookie));
delete_cookie (cookie);
return NULL;
}
#undef TOKEN_IS
#undef TOKEN_NON_EMPTY
/* Sanity checks. These are important, otherwise it is possible for
mailcious attackers to destroy important cookie information and/or
violate your privacy. */
#define REQUIRE_DIGITS(p) do { \
if (!ISDIGIT (*p)) \
return false; \
for (++p; ISDIGIT (*p); p++) \
; \
} while (0)
#define REQUIRE_DOT(p) do { \
if (*p++ != '.') \
return false; \
} while (0)
/* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
We don't want to call network functions like inet_addr() because
all we need is a check, preferrably one that is small, fast, and
well-defined. */
static bool
numeric_address_p (const char *addr)
{
const char *p = addr;
REQUIRE_DIGITS (p); /* A */
REQUIRE_DOT (p); /* . */
REQUIRE_DIGITS (p); /* B */
REQUIRE_DOT (p); /* . */
REQUIRE_DIGITS (p); /* C */
REQUIRE_DOT (p); /* . */
REQUIRE_DIGITS (p); /* D */
if (*p != '\0')
return false;
return true;
}
/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
Originally I tried to make the check compliant with rfc2109, but
the sites deviated too often, so I had to fall back to "tail
matching", as defined by the original Netscape's cookie spec. */
static bool
check_domain_match (const char *cookie_domain, const char *host)
{
DEBUGP (("cdm: 1"));
/* Numeric address requires exact match. It also requires HOST to
be an IP address. */
if (numeric_address_p (cookie_domain))
return 0 == strcmp (cookie_domain, host);
DEBUGP ((" 2"));
/* For the sake of efficiency, check for exact match first. */
if (0 == strcasecmp (cookie_domain, host))
return true;
DEBUGP ((" 3"));
/* HOST must match the tail of cookie_domain. */
if (!match_tail (host, cookie_domain, true))
return false;
/* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
make sure that somebody is not trying to set the cookie for a
subdomain shared by many entities. For example, "company.co.uk"
must not be allowed to set a cookie for ".co.uk". On the other
hand, "sso.redhat.de" should be able to set a cookie for
".redhat.de".
The only marginally sane way to handle this I can think of is to
reject on the basis of the length of the second-level domain name
(but when the top-level domain is unknown), with the assumption
that those of three or less characters could be reserved. For
example:
.co.org -> works because the TLD is known
.co.uk -> doesn't work because "co" is only two chars long
.com.au -> doesn't work because "com" is only 3 chars long
.cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
.cnn.de -> doesn't work for the same reason (ugh!!)
.abcd.de -> works because "abcd" is 4 chars long
.img.cnn.de -> works because it's not trying to set the 2nd level domain
.cnn.co.uk -> works for the same reason
That should prevent misuse, while allowing reasonable usage. If
someone knows of a better way to handle this, please let me
know. */
{
const char *p = cookie_domain;
int dccount = 1; /* number of domain components */
int ldcl = 0; /* last domain component length */
int nldcl = 0; /* next to last domain component length */
int out;
if (*p == '.')
/* Ignore leading period in this calculation. */
++p;
DEBUGP ((" 4"));
for (out = 0; !out; p++)
switch (*p)
{
case '\0':
out = 1;
break;
case '.':
if (ldcl == 0)
/* Empty domain component found -- the domain is invalid. */
return false;
if (*(p + 1) == '\0')
{
/* Tolerate trailing '.' by not treating the domain as
one ending with an empty domain component. */
out = 1;
break;
}
nldcl = ldcl;
ldcl = 0;
++dccount;
break;
default:
++ldcl;
}
DEBUGP ((" 5"));
if (dccount < 2)
return false;
DEBUGP ((" 6"));
if (dccount == 2)
{
int i;
int known_toplevel = false;
static const char *known_toplevel_domains[] = {
".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
};
for (i = 0; i < countof (known_toplevel_domains); i++)
if (match_tail (cookie_domain, known_toplevel_domains[i], true))
{
known_toplevel = true;
break;
}
if (!known_toplevel && nldcl <= 3)
return false;
}
}
DEBUGP ((" 7"));
/* Don't allow the host "foobar.com" to set a cookie for domain
"bar.com". */
if (*cookie_domain != '.')
{
int dlen = strlen (cookie_domain);
int hlen = strlen (host);
/* cookie host: hostname.foobar.com */
/* desired domain: bar.com */
/* '.' must be here in host-> ^ */
if (hlen > dlen && host[hlen - dlen - 1] != '.')
return false;
}
DEBUGP ((" 8"));
return true;
}
static int path_matches (const char *, const char *);
/* Check whether PATH begins with COOKIE_PATH. */
static bool
check_path_match (const char *cookie_path, const char *path)
{
return path_matches (path, cookie_path) != 0;
}
/* Prepend '/' to string S. S is copied to fresh stack-allocated
space and its value is modified to point to the new location. */
#define PREPEND_SLASH(s) do { \
char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
*PS_newstr = '/'; \
strcpy (PS_newstr + 1, s); \
s = PS_newstr; \
} while (0)
/* Process the HTTP `Set-Cookie' header. This results in storing the
cookie or discarding a matching one, or ignoring it completely, all
depending on the contents. */
void
cookie_handle_set_cookie (struct cookie_jar *jar,
const char *host, int port,
const char *path, const char *set_cookie)
{
struct cookie *cookie;
cookies_now = time (NULL);
/* Wget's paths don't begin with '/' (blame rfc1808), but cookie
usage assumes /-prefixed paths. Until the rest of Wget is fixed,
simply prepend slash to PATH. */
PREPEND_SLASH (path);
cookie = parse_set_cookie (set_cookie, false);
if (!cookie)
goto out;
/* Sanitize parts of cookie. */
if (!cookie->domain)
{
copy_domain:
/* If the domain was not provided, we use the one we're talking
to, and set exact match. */
cookie->domain = xstrdup (host);
cookie->domain_exact = 1;
/* Set the port, but only if it's non-default. */
if (port != 80 && port != 443)
cookie->port = port;
}
else
{
if (!check_domain_match (cookie->domain, host))
{
logprintf (LOG_NOTQUIET,
_("Cookie coming from %s attempted to set domain to %s\n"),
escnonprint (host), escnonprint (cookie->domain));
xfree (cookie->domain);
goto copy_domain;
}
}
if (!cookie->path)
{
/* The cookie doesn't set path: set it to the URL path, sans the
file part ("/dir/file" truncated to "/dir/"). */
char *trailing_slash = strrchr (path, '/');
if (trailing_slash)
cookie->path = strdupdelim (path, trailing_slash + 1);
else
/* no slash in the string -- can this even happen? */
cookie->path = xstrdup (path);
}
else
{
/* The cookie sets its own path; verify that it is legal. */
if (!check_path_match (cookie->path, path))
{
DEBUGP (("Attempt to fake the path: %s, %s\n",
cookie->path, path));
goto out;
}
}
/* Now store the cookie, or discard an existing cookie, if
discarding was requested. */
if (cookie->discard_requested)
{
discard_matching_cookie (jar, cookie);
goto out;
}
store_cookie (jar, cookie);
return;
out:
if (cookie)
delete_cookie (cookie);
}
/* Support for sending out cookies in HTTP requests, based on
previously stored cookies. Entry point is
`build_cookies_request'. */
/* Return a count of how many times CHR occurs in STRING. */
static int
count_char (const char *string, char chr)
{
const char *p;
int count = 0;
for (p = string; *p; p++)
if (*p == chr)
++count;
return count;
}
/* Find the cookie chains whose domains match HOST and store them to
DEST.
A cookie chain is the head of a list of cookies that belong to a
host/domain. Given HOST "img.search.xemacs.org", this function
will return the chains for "img.search.xemacs.org",
"search.xemacs.org", and "xemacs.org" -- those of them that exist
(if any), that is.
DEST should be large enough to accept (in the worst case) as many
elements as there are domain components of HOST. */
static int
find_chains_of_host (struct cookie_jar *jar, const char *host,
struct cookie *dest[])
{
int dest_count = 0;
int passes, passcnt;
/* Bail out quickly if there are no cookies in the jar. */
if (!hash_table_count (jar->chains))
return 0;
if (numeric_address_p (host))
/* If host is an IP address, only check for the exact match. */
passes = 1;
else
/* Otherwise, check all the subdomains except the top-level (last)
one. As a domain with N components has N-1 dots, the number of
passes equals the number of dots. */
passes = count_char (host, '.');
passcnt = 0;
/* Find chains that match HOST, starting with exact match and
progressing to less specific domains. For instance, given HOST
fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
srk.fer.hr's, then fer.hr's. */
while (1)
{
struct cookie *chain = hash_table_get (jar->chains, host);
if (chain)
dest[dest_count++] = chain;
if (++passcnt >= passes)
break;
host = strchr (host, '.') + 1;
}
return dest_count;
}
/* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
otherwise. */
static int
path_matches (const char *full_path, const char *prefix)
{
int len = strlen (prefix);
if (0 != strncmp (full_path, prefix, len))
/* FULL_PATH doesn't begin with PREFIX. */
return 0;
/* Length of PREFIX determines the quality of the match. */
return len + 1;
}
/* Return true iff COOKIE matches the provided parameters of the URL
being downloaded: HOST, PORT, PATH, and SECFLAG.
If PATH_GOODNESS is non-NULL, store the "path goodness" value
there. That value is a measure of how closely COOKIE matches PATH,
used for ordering cookies. */
static bool
cookie_matches_url (const struct cookie *cookie,
const char *host, int port, const char *path,
bool secflag, int *path_goodness)
{
int pg;
if (cookie_expired_p (cookie))
/* Ignore stale cookies. Don't bother unchaining the cookie at
this point -- Wget is a relatively short-lived application, and
stale cookies will not be saved by `save_cookies'. On the
other hand, this function should be as efficient as
possible. */
return false;
if (cookie->secure && !secflag)
/* Don't transmit secure cookies over insecure connections. */
return false;
if (cookie->port != PORT_ANY && cookie->port != port)
return false;
/* If exact domain match is required, verify that cookie's domain is
equal to HOST. If not, assume success on the grounds of the
cookie's chain having been found by find_chains_of_host. */
if (cookie->domain_exact
&& 0 != strcasecmp (host, cookie->domain))
return false;
pg = path_matches (path, cookie->path);
if (pg == 0)
return false;
if (path_goodness)
/* If the caller requested path_goodness, we return it. This is
an optimization, so that the caller doesn't need to call
path_matches() again. */
*path_goodness = pg;
return true;
}
/* A structure that points to a cookie, along with the additional
information about the cookie's "goodness". This allows us to sort
the cookies when returning them to the server, as required by the
spec. */
struct weighed_cookie {
struct cookie *cookie;
int domain_goodness;
int path_goodness;
};
/* Comparator used for uniquifying the list. */
static int
equality_comparator (const void *p1, const void *p2)
{
struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
/* We only really care whether both name and value are equal. We
return them in this order only for consistency... */
return namecmp ? namecmp : valuecmp;
}
/* Eliminate duplicate cookies. "Duplicate cookies" are any two
cookies with the same attr name and value. Whenever a duplicate
pair is found, one of the cookies is removed. */
static int
eliminate_dups (struct weighed_cookie *outgoing, int count)
{
struct weighed_cookie *h; /* hare */
struct weighed_cookie *t; /* tortoise */
struct weighed_cookie *end = outgoing + count;
/* We deploy a simple uniquify algorithm: first sort the array
according to our sort criteria, then copy it to itself, comparing
each cookie to its neighbor and ignoring the duplicates. */
qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
/* "Hare" runs through all the entries in the array, followed by
"tortoise". If a duplicate is found, the hare skips it.
Non-duplicate entries are copied to the tortoise ptr. */
for (h = t = outgoing; h < end; h++)
{
if (h != end - 1)
{
struct cookie *c0 = h[0].cookie;
struct cookie *c1 = h[1].cookie;
if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
continue; /* ignore the duplicate */
}
/* If the hare has advanced past the tortoise (because of
previous dups), make sure the values get copied. Otherwise,
no copying is necessary. */
if (h != t)
*t++ = *h;
else
t++;
}
return t - outgoing;
}
/* Comparator used for sorting by quality. */
static int
goodness_comparator (const void *p1, const void *p2)
{
struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
/* Subtractions take `wc2' as the first argument becauase we want a
sort in *decreasing* order of goodness. */
int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
int pgdiff = wc2->path_goodness - wc1->path_goodness;
/* Sort by domain goodness; if these are the same, sort by path
goodness. (The sorting order isn't really specified; maybe it
should be the other way around.) */
return dgdiff ? dgdiff : pgdiff;
}
/* Generate a `Cookie' header for a request that goes to HOST:PORT and
requests PATH from the server. The resulting string is allocated
with `malloc', and the caller is responsible for freeing it. If no
cookies pertain to this request, i.e. no cookie header should be
generated, NULL is returned. */
char *
cookie_header (struct cookie_jar *jar, const char *host,
int port, const char *path, bool secflag)
{
struct cookie **chains;
int chain_count;
struct cookie *cookie;
struct weighed_cookie *outgoing;
int count, i, ocnt;
char *result;
int result_size, pos;
PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
/* First, find the cookie chains whose domains match HOST. */
/* Allocate room for find_chains_of_host to write to. The number of
chains can at most equal the number of subdomains, hence
1+<number of dots>. */
chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
chain_count = find_chains_of_host (jar, host, chains);
/* No cookies for this host. */
if (!chain_count)
return NULL;
cookies_now = time (NULL);
/* Now extract from the chains those cookies that match our host
(for domain_exact cookies), port (for cookies with port other
than PORT_ANY), etc. See matching_cookie for details. */
/* Count the number of matching cookies. */
count = 0;
for (i = 0; i < chain_count; i++)
for (cookie = chains[i]; cookie; cookie = cookie->next)
if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
++count;
if (!count)
return NULL; /* no cookies matched */
/* Allocate the array. */
outgoing = alloca_array (struct weighed_cookie, count);
/* Fill the array with all the matching cookies from the chains that
match HOST. */
ocnt = 0;
for (i = 0; i < chain_count; i++)
for (cookie = chains[i]; cookie; cookie = cookie->next)
{
int pg;
if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
continue;
outgoing[ocnt].cookie = cookie;
outgoing[ocnt].domain_goodness = strlen (cookie->domain);
outgoing[ocnt].path_goodness = pg;
++ocnt;
}
assert (ocnt == count);
/* Eliminate duplicate cookies; that is, those whose name and value
are the same. */
count = eliminate_dups (outgoing, count);
/* Sort the array so that best-matching domains come first, and
that, within one domain, best-matching paths come first. */
qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
/* Count the space the name=value pairs will take. */
result_size = 0;
for (i = 0; i < count; i++)
{
struct cookie *c = outgoing[i].cookie;
/* name=value */
result_size += strlen (c->attr) + 1 + strlen (c->value);
}
/* Allocate output buffer:
name=value pairs -- result_size
"; " separators -- (count - 1) * 2
\0 terminator -- 1 */
result_size = result_size + (count - 1) * 2 + 1;
result = xmalloc (result_size);
pos = 0;
for (i = 0; i < count; i++)
{
struct cookie *c = outgoing[i].cookie;
int namlen = strlen (c->attr);
int vallen = strlen (c->value);
memcpy (result + pos, c->attr, namlen);
pos += namlen;
result[pos++] = '=';
memcpy (result + pos, c->value, vallen);
pos += vallen;
if (i < count - 1)
{
result[pos++] = ';';
result[pos++] = ' ';
}
}
result[pos++] = '\0';
assert (pos == result_size);
return result;
}
/* Support for loading and saving cookies. The format used for
loading and saving should be the format of the `cookies.txt' file
used by Netscape and Mozilla, at least the Unix versions.
(Apparently IE can export cookies in that format as well.) The
format goes like this:
DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
DOMAIN -- cookie domain, optionally followed by :PORT
DOMAIN-FLAG -- whether all hosts in the domain match
PATH -- cookie path
SECURE-FLAG -- whether cookie requires secure connection
TIMESTAMP -- expiry timestamp, number of seconds since epoch
ATTR-NAME -- name of the cookie attribute
ATTR-VALUE -- value of the cookie attribute (empty if absent)
The fields are separated by TABs. All fields are mandatory, except
for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
being "TRUE" and "FALSE'. Empty lines, lines consisting of
whitespace only, and comment lines (beginning with # optionally
preceded by whitespace) are ignored.
Example line from cookies.txt (split in two lines for readability):
.google.com TRUE / FALSE 2147368447 \
PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
*/
/* If the region [B, E) ends with :<digits>, parse the number, return
it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
If port is not specified, return 0. */
static int
domain_port (const char *domain_b, const char *domain_e,
const char **domain_e_ptr)
{
int port = 0;
const char *p;
const char *colon = memchr (domain_b, ':', domain_e - domain_b);
if (!colon)
return 0;
for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
port = 10 * port + (*p - '0');
if (p < domain_e)
/* Garbage following port number. */
return 0;
*domain_e_ptr = colon;
return port;
}
#define GET_WORD(p, b, e) do { \
b = p; \
while (*p && *p != '\t') \
++p; \
e = p; \
if (b == e || !*p) \
goto next; \
++p; \
} while (0)
/* Load cookies from FILE. */
void
cookie_jar_load (struct cookie_jar *jar, const char *file)
{
char *line;
FILE *fp = fopen (file, "r");
if (!fp)
{
logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
file, strerror (errno));
return;
}
cookies_now = time (NULL);
for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
{
struct cookie *cookie;
char *p = line;
double expiry;
int port;
char *domain_b = NULL, *domain_e = NULL;
char *domflag_b = NULL, *domflag_e = NULL;
char *path_b = NULL, *path_e = NULL;
char *secure_b = NULL, *secure_e = NULL;
char *expires_b = NULL, *expires_e = NULL;
char *name_b = NULL, *name_e = NULL;
char *value_b = NULL, *value_e = NULL;
/* Skip leading white-space. */
while (*p && ISSPACE (*p))
++p;
/* Ignore empty lines. */
if (!*p || *p == '#')
continue;
GET_WORD (p, domain_b, domain_e);
GET_WORD (p, domflag_b, domflag_e);
GET_WORD (p, path_b, path_e);
GET_WORD (p, secure_b, secure_e);
GET_WORD (p, expires_b, expires_e);
GET_WORD (p, name_b, name_e);
/* Don't use GET_WORD for value because it ends with newline,
not TAB. */
value_b = p;
value_e = p + strlen (p);
if (value_e > value_b && value_e[-1] == '\n')
--value_e;
if (value_e > value_b && value_e[-1] == '\r')
--value_e;
/* Empty values are legal (I think), so don't bother checking. */
cookie = cookie_new ();
cookie->attr = strdupdelim (name_b, name_e);
cookie->value = strdupdelim (value_b, value_e);
cookie->path = strdupdelim (path_b, path_e);
cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
/* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
value indicating if all machines within a given domain can
access the variable. This value is set automatically by the
browser, depending on the value set for the domain." */
cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
/* DOMAIN needs special treatment because we might need to
extract the port. */
port = domain_port (domain_b, domain_e, (const char **)&domain_e);
if (port)
cookie->port = port;
if (*domain_b == '.')
++domain_b; /* remove leading dot internally */
cookie->domain = strdupdelim (domain_b, domain_e);
/* safe default in case EXPIRES field is garbled. */
expiry = (double)cookies_now - 1;
/* I don't like changing the line, but it's safe here. (line is
malloced.) */
*expires_e = '\0';
sscanf (expires_b, "%lf", &expiry);
if (expiry == 0)
{
/* EXPIRY can be 0 for session cookies saved because the
user specified `--keep-session-cookies' in the past.
They remain session cookies, and will be saved only if
the user has specified `keep-session-cookies' again. */
}
else
{
if (expiry < cookies_now)
goto abort_cookie; /* ignore stale cookie. */
cookie->expiry_time = expiry;
cookie->permanent = 1;
}
store_cookie (jar, cookie);
next:
continue;
abort_cookie:
delete_cookie (cookie);
}
fclose (fp);
}
/* Save cookies, in format described above, to FILE. */
void
cookie_jar_save (struct cookie_jar *jar, const char *file)
{
FILE *fp;
hash_table_iterator iter;
DEBUGP (("Saving cookies to %s.\n", file));
cookies_now = time (NULL);
fp = fopen (file, "w");
if (!fp)
{
logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
file, strerror (errno));
return;
}
fputs ("# HTTP cookie file.\n", fp);
fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
fputs ("# Edit at your own risk.\n\n", fp);
for (hash_table_iterate (jar->chains, &iter);
hash_table_iter_next (&iter);
)
{
const char *domain = iter.key;
struct cookie *cookie = iter.value;
for (; cookie; cookie = cookie->next)
{
if (!cookie->permanent && !opt.keep_session_cookies)
continue;
if (cookie_expired_p (cookie))
continue;
if (!cookie->domain_exact)
fputc ('.', fp);
fputs (domain, fp);
if (cookie->port != PORT_ANY)
fprintf (fp, ":%d", cookie->port);
fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
cookie->domain_exact ? "FALSE" : "TRUE",
cookie->path, cookie->secure ? "TRUE" : "FALSE",
(double)cookie->expiry_time,
cookie->attr, cookie->value);
if (ferror (fp))
goto out;
}
}
out:
if (ferror (fp))
logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
file, strerror (errno));
if (fclose (fp) < 0)
logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
file, strerror (errno));
DEBUGP (("Done saving cookies.\n"));
}
/* Clean up cookie-related data. */
void
cookie_jar_delete (struct cookie_jar *jar)
{
/* Iterate over chains (indexed by domain) and free them. */
hash_table_iterator iter;
for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
{
struct cookie *chain = iter.value;
xfree (iter.key);
/* Then all cookies in this chain. */
while (chain)
{
struct cookie *next = chain->next;
delete_cookie (chain);
chain = next;
}
}
hash_table_destroy (jar->chains);
xfree (jar);
}
/* Test cases. Currently this is only tests parse_set_cookies. To
use, recompile Wget with -DTEST_COOKIES and call test_cookies()
from main. */
#ifdef TEST_COOKIES
void
test_cookies (void)
{
/* Tests expected to succeed: */
static struct {
const char *data;
const char *results[10];
} tests_succ[] = {
{ "arg=value", {"arg", "value", NULL} },
{ "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
{ "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
{ "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
{ "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
{ "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
{ "arg=", {"arg", "", NULL} },
{ "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
{ "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
};
/* Tests expected to fail: */
static char *tests_fail[] = {
";",
"arg=\"unterminated",
"=empty-name",
"arg1=;=another-empty-name",
};
int i;
for (i = 0; i < countof (tests_succ); i++)
{
int ind;
const char *data = tests_succ[i].data;
const char **expected = tests_succ[i].results;
struct cookie *c;
c = parse_set_cookie (data, true);
if (!c)
{
printf ("NULL cookie returned for valid data: %s\n", data);
continue;
}
/* Test whether extract_param handles these cases correctly. */
{
param_token name, value;
const char *ptr = data;
int j = 0;
while (extract_param (&ptr, &name, &value, ';'))
{
char *n = strdupdelim (name.b, name.e);
char *v = strdupdelim (value.b, value.e);
if (!expected[j])
{
printf ("Too many parameters for '%s'\n", data);
break;
}
if (0 != strcmp (expected[j], n))
printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
j / 2 + 1, data, expected[j], n);
if (0 != strcmp (expected[j + 1], v))
printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
j / 2 + 1, data, expected[j + 1], v);
j += 2;
free (n);
free (v);
}
if (expected[j])
printf ("Too few parameters for '%s'\n", data);
}
}
for (i = 0; i < countof (tests_fail); i++)
{
struct cookie *c;
char *data = tests_fail[i];
c = parse_set_cookie (data, true);
if (c)
printf ("Failed to report error on invalid data: %s\n", data);
}
}
#endif /* TEST_COOKIES */
|