PATH: //opt/alt/python311/share/doc/alt-python311-pyparsing-doc/examples
FILE_BARU
CREATE
FOLDER_BARU
MKDIR
UPLOAD_FILE
GO
[ .. KEMBALI ]
📄 0README.html
↓
X
📄 AcManForm.dfm
↓
X
📄 LAparser.py
↓
X
📄 Setup.ini
↓
X
📄 SimpleCalc.py
↓
X
📄 SingleForm.dfm
↓
X
📄 TAP.py
↓
X
📄 __init__.py
↓
X
📁 __pycache__/
X
📄 adventureEngine.py
↓
X
📄 antlr_grammar.py
↓
X
📄 antlr_grammar_tests.py
↓
X
📄 apicheck.py
↓
X
📄 bigquery_view_parser.py
↓
X
📄 booleansearchparser.py
↓
X
📄 btpyparse.py
↓
X
📄 builtin_parse_action_demo.py
↓
X
📄 cLibHeader.py
↓
X
📄 chemicalFormulas.py
↓
X
📄 commasep.py
↓
X
📄 configParse.py
↓
X
📄 cpp_enum_parser.py
↓
X
📄 cuneiform_python.py
↓
X
📄 datetimeParseActions.py
↓
X
📄 decaf_parser.py
↓
X
📄 delta_time.py
↓
X
📄 dfmparse.py
↓
X
📄 dhcpd_leases_parser.py
↓
X
📄 dictExample.py
↓
X
📄 dictExample2.py
↓
X
📄 ebnf.py
↓
X
📄 ebnftest.py
↓
X
📄 eval_arith.py
↓
X
📄 excelExpr.py
↓
X
📄 fourFn.py
↓
X
📄 gen_ctypes.py
↓
X
📄 getNTPserversNew.py
↓
X
📄 greeting.py
↓
X
📄 greetingInGreek.py
↓
X
📄 greetingInKorean.py
↓
X
📄 groupUsingListAllMatches.py
↓
X
📄 holaMundo.py
↓
X
📄 htmlStripper.py
↓
X
📄 htmlTableParser.py
↓
X
📄 httpServerLogParser.py
↓
X
📄 idlParse.py
↓
X
📄 include_preprocessor.py
↓
X
📄 indentedGrammarExample.py
↓
X
📄 indented_block_example.py
↓
X
📄 invRegex.py
↓
X
📄 javascript_grammar.g
↓
X
📄 jsonParser.py
↓
X
📄 left_recursion.py
↓
X
📄 linenoExample.py
↓
X
📄 listAllMatches.py
↓
X
📄 lua_parser.py
↓
X
📄 lucene_grammar.py
↓
X
📄 macroExpander.py
↓
X
📄 make_diagram.py
↓
X
📄 matchPreviousDemo.py
↓
X
📄 mozilla.ics
↓
X
📄 mozillaCalendarParser.py
↓
X
📄 nested.py
↓
X
📄 nested_markup.py
↓
X
📄 number_words.py
↓
X
📄 numerics.py
↓
X
📄 oc.py
↓
X
📄 one_to_ninety_nine.py
↓
X
📄 parsePythonValue.py
↓
X
📄 parseResultsSumExample.py
↓
X
📄 parseTabularData.py
↓
X
📄 partial_gene_match.py
↓
X
📄 pgn.py
↓
X
📄 position.py
↓
X
📄 protobuf_parser.py
↓
X
📄 pymicko.py
↓
X
📄 pythonGrammarParser.py
↓
X
📄 railroad_diagram_demo.py
↓
X
📄 rangeCheck.py
↓
X
📄 readJson.py
↓
X
📄 removeLineBreaks.py
↓
X
📄 romanNumerals.py
↓
X
📄 rosettacode.py
↓
X
📄 scanExamples.py
↓
X
📄 searchParserAppDemo.py
↓
X
📄 searchparser.py
↓
X
📄 select_parser.py
↓
X
📄 sexpParser.py
↓
X
📄 shapes.py
↓
X
📄 simpleArith.py
↓
X
📄 simpleBool.py
↓
X
📄 simpleSQL.py
↓
X
📄 simpleWiki.py
↓
X
📄 snmp_api.h
↓
X
📄 sparser.py
↓
X
📄 sql2dot.py
↓
X
📄 stackish.py
↓
X
📁 statemachine/
X
📄 test_bibparse.py
↓
X
📄 unicode_denormalizer.py
↓
X
📄 urlExtractor.py
↓
X
📄 urlExtractorNew.py
↓
X
📁 verilog/
X
📄 verilogParse.py
↓
X
📄 withAttribute.py
↓
X
📄 wordsToNum.py
↓
X
SAVING...
BERHASIL DIUBAH!
EDITING: urlExtractorNew.py
# URL extractor # Copyright 2004, Paul McGuire from collections import Counter import pprint from urllib.request import urlopen from pyparsing import makeHTMLTags, pyparsing_common as ppc, FollowedBy, trace_parse_action # Define the pyparsing grammar for a URL, that is: # URLlink ::= <a href= URL>linkText</a> # URL ::= doubleQuotedString | alphanumericWordPath # Note that whitespace may appear just about anywhere in the link. Note also # that it is not necessary to explicitly show this in the pyparsing grammar; by default, # pyparsing skips over whitespace between tokens. linkOpenTag, linkCloseTag = makeHTMLTags("a") link = linkOpenTag + linkOpenTag.tag_body("body") + linkCloseTag.suppress() # Add a parse action to expand relative URLs def expand_relative_url(t): url = t.href if url.startswith("//"): url = "https:" + url elif url.startswith(("/", "?", "#")): url = "https://www.cnn.com" + url # Put modified URL back into input tokens t["href"] = url link.add_parse_action(expand_relative_url) # Go get some HTML with some links in it. with urlopen("https://www.cnn.com/") as serverListPage: htmlText = serverListPage.read().decode() # scanString is a generator that loops through the input htmlText, and for each # match yields the tokens and start and end locations (for this application, we are # not interested in the start and end values). for toks, strt, end in link.scanString(htmlText): print(toks.startA.href, "->", toks.body) # Create dictionary with a dict comprehension, assembled from each pair of tokens returned # from a matched URL. links = {toks.body: toks.href for toks, _, _ in link.scanString(htmlText)} pprint.pprint(links) # Parse the urls in the links using pyparsing_common.url, and tally up all # the different domains in a Counter. domains = Counter() for url in links.values(): print(url) parsed = ppc.url.parseString(url) # print parsed fields for each new url if parsed.host not in domains: print(parsed.dump()) print() # update domain counter domains[parsed.host] += 1 # Print out a little table of all the domains in the urls max_domain_len = max(len(d) for d in domains) print() print("{:{}s} {}".format("Domain", max_domain_len, "Count")) print("{:=<{}} {:=<5}".format("", max_domain_len, "")) for domain, count in domains.most_common(): print("{:{}s} {:5d}".format(domain, max_domain_len, count))
SIMPAN PERUBAHAN