OXIESEC PANEL
- Current Dir:
/
/
opt
/
gsutil
/
third_party
/
pyparsing
/
examples
Server IP: 2a02:4780:11:1594:0:ef5:22d7:a
Upload:
Create Dir:
Name
Size
Modified
Perms
📁
..
-
12/11/2024 09:39:44 AM
rwxr-xr-x
📄
0README.html
10.75 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
AcManForm.dfm
38.22 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
LAparser.py
20.62 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
Setup.ini
1.41 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
SimpleCalc.py
3.58 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
SingleForm.dfm
42.7 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
TAP.py
7.42 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
__init__.py
0 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
adventureEngine.py
21.59 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
antlr_grammar.py
10.88 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
antlr_grammar_tests.py
2.89 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
apicheck.py
2.2 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
bf.py
4.15 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
bigquery_view_parser.py
62.51 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
booleansearchparser.py
15.26 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
btpyparse.py
4.16 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
builtin_parse_action_demo.py
817 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
cLibHeader.py
853 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
chemical_formulas.py
3.17 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
commasep.py
707 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
configParse.py
2 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
cpp_enum_parser.py
1.35 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
cuneiform_python.py
2.57 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
datetime_parse_actions.py
1.9 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
decaf_parser.py
6.87 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
delta_time.py
21.49 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
dfmparse.py
7.05 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
dhcpd_leases_parser.py
2.98 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
dictExample.py
1.73 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
dictExample2.py
2.1 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
directx_x_file_parser.py
5.54 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
ebnf.py
4.15 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
ebnftest.py
2.35 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
email_address_parser.py
1.18 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
eval_arith.py
6.15 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
excel_expr.py
2.2 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
fourFn.py
10.27 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
gen_ctypes.py
5.28 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
getNTPserversNew.py
1.11 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
greeting.py
518 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
greetingInGreek.py
425 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
greetingInKorean.py
495 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
groupUsingListAllMatches.py
509 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
hola_mundo.py
2.02 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
html_stripper.py
1.68 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
html_table_parser.py
2.16 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
httpServerLogParser.py
3.48 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
idlParse.py
7.53 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
include_preprocessor.py
2.63 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
indentedGrammarExample.py
1022 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
indented_block_example.py
712 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
inv_regex.py
8.08 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
javascript_grammar.g
18.1 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
jsonParser.py
4.7 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
left_recursion.py
1.41 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
lineno_example.py
1.49 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
listAllMatches.py
1.57 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
lua_parser.py
7.96 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
lucene_grammar.py
8.46 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
macro_expander.py
1.77 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
make_diagram.py
1.62 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
matchPreviousDemo.py
586 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
mongodb_query_expression.py
8.46 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
mozilla.ics
752 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
mozillaCalendarParser.py
2.71 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
nested.py
579 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
nested_markup.py
1.85 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
number_words.py
3.77 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
numerics.py
1.34 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
oc.py
4.34 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
one_to_ninety_nine.py
1.88 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
parseTabularData.py
1.68 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
parse_python_value.py
2.5 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
parse_results_sum_example.py
853 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
partial_gene_match.py
2.33 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
pgn.py
3.34 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
position.py
2.17 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
protobuf_parser.py
3.89 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
pythonGrammarParser.py
8.08 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
railroad_diagram_demo.py
1.55 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
range_check.py
2.84 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
readJson.py
120.73 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
removeLineBreaks.py
1.8 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
roman_numerals.py
3.57 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
rosettacode.py
7.43 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
scanExamples.py
2.36 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
searchParserAppDemo.py
962 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
searchparser.py
10.4 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
select_parser.py
8.32 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
sexpParser.py
5.23 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
shapes.py
1.69 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
simpleArith.py
2.42 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
simpleBool.py
3.19 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
simpleSQL.py
3.16 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
simpleWiki.py
1.08 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
snmp_api.h
28.69 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
sql2dot.py
3.07 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
stackish.py
2.71 KB
08/25/2024 02:43:42 PM
rw-r--r--
📁
statemachine
-
08/25/2024 02:43:42 PM
rwxr-xr-x
📄
tag_emitter.py
1.17 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
tag_metadata.py
805 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
test_bibparse.py
8.72 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
unicode_denormalizer.py
5.02 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
urlExtractor.py
1.03 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
urlExtractorNew.py
2.4 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
verilog_parse.py
31.15 KB
08/25/2024 02:43:42 PM
rw-r--r--
📄
withAttribute.py
912 bytes
08/25/2024 02:43:42 PM
rw-r--r--
📄
wordsToNum.py
2.92 KB
08/25/2024 02:43:42 PM
rw-r--r--
Editing: urlExtractorNew.py
Close
# URL extractor # Copyright 2004, Paul McGuire from collections import Counter import pprint from urllib.request import urlopen from pyparsing import makeHTMLTags, pyparsing_common as ppc, FollowedBy, trace_parse_action # Define the pyparsing grammar for a URL, that is: # URLlink ::= <a href= URL>linkText</a> # URL ::= doubleQuotedString | alphanumericWordPath # Note that whitespace may appear just about anywhere in the link. Note also # that it is not necessary to explicitly show this in the pyparsing grammar; by default, # pyparsing skips over whitespace between tokens. linkOpenTag, linkCloseTag = makeHTMLTags("a") link = linkOpenTag + linkOpenTag.tag_body("body") + linkCloseTag.suppress() # Add a parse action to expand relative URLs def expand_relative_url(t): url = t.href if url.startswith("//"): url = "https:" + url elif url.startswith(("/", "?", "#")): url = "https://www.cnn.com" + url # Put modified URL back into input tokens t["href"] = url link.add_parse_action(expand_relative_url) # Go get some HTML with some links in it. with urlopen("https://www.cnn.com/") as serverListPage: htmlText = serverListPage.read().decode() # scanString is a generator that loops through the input htmlText, and for each # match yields the tokens and start and end locations (for this application, we are # not interested in the start and end values). for toks, strt, end in link.scanString(htmlText): print(toks.startA.href, "->", toks.body) # Create dictionary with a dict comprehension, assembled from each pair of tokens returned # from a matched URL. links = {toks.body: toks.href for toks, _, _ in link.scanString(htmlText)} pprint.pprint(links) # Parse the urls in the links using pyparsing_common.url, and tally up all # the different domains in a Counter. domains = Counter() for url in links.values(): print(url) parsed = ppc.url.parseString(url) # print parsed fields for each new url if parsed.host not in domains: print(parsed.dump()) print() # update domain counter domains[parsed.host] += 1 # Print out a little table of all the domains in the urls max_domain_len = max(len(d) for d in domains) print() print("{:{}s} {}".format("Domain", max_domain_len, "Count")) print("{:=<{}} {:=<5}".format("", max_domain_len, "")) for domain, count in domains.most_common(): print("{:{}s} {:5d}".format(domain, max_domain_len, count))