我是靠谱客的博主 动人飞鸟,最近开发中收集的这篇文章主要介绍pythontable处理_python处理html的table标签,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

import sys

import csv

import urllib2

import BeautifulSoup

#page = urllib2.urlopen(sys.argv[1]).read()

soup = BeautifulSoup.BeautifulSoup(open(sys.argv[1]).read())

csvout = csv.writer(sys.stdout)

for table in soup.findAll('table'):

print "

#print '#'

#print '# Table'

#print '# Fields: ' + ','.join([tr.text for tr in table.findAll('th')])

for row in table.findAll('tr'):

print "

"

#csvout.writerow([tr.text for tr in row.findAll('td')])

for tr in row.findAll('td'):

print "

"

print tr.text.encode("utf-8")

print "

"

print "

"

print "

"

break

#!/bin/bash

#process.h

basedir=$(dirname $1)

echo $basedir

#echo

\ >> $basedir/baobei.html

prodname=$(grep -o '

.*' $1 | cut -d > -f 2 | cut -d < -f 1)

prodname=$(echo $prodname | cut -d _ -f 1)

price=$(grep -o '[0-9]*' $1 | cut -d > -f 2 | cut -d < -f 1)

echo

echo

>> $basedir/baobei.html

echo

Name >> $basedir/baobei.html

echo

$prodname >> $basedir/baobei.html

echo

>> $basedir/baobei.html

echo

>> $basedir/baobei.html

echo

Price >> $basedir/baobei.html

echo

$price >> $basedir/baobei.html

echo

>> $basedir/baobei.html

python ./printtab.py $1 >> $basedir/baobei.html

echo

>> $basedir/baobei.html

imgsrc=$(head -n 1 $basedir/imglist)

if test y$imgsrc = y; then

rm -rf $basedir/baobei.html && exit;

fi

echo %5c >> $basedir/baobei.html

cat $basedir/baobei.html | tr -d 'n' | tr -d '"' > $basedir/baobei.html.tmp

mv $basedir/baobei.html.tmp $basedir/baobei.html

#!/bn/bash

#process2.sh

basedir=$(dirname $1)

name=$(grep -o "

Name.*" $1 | cut -d > -f 4 | cut -d < -f 1 )

if test "x$name" = "x" ; then

exit ;

fi

price=$(grep -o "

Price.*" $1 | cut -d > -f 4 | cut -d < -f 1 )

if test "x$price" = "x" ; then

exit;

fi

if test "x$class" = "x"

then

class=$(grep -o "

产品类型.*" $1 | cut -d > -f 4 | cut -d < -f 1 )

fi

if test "x$class" = "x"

then

class=$(grep -o "

设备类型.*" $1 | cut -d > -f 4 | cut -d < -f 1 )

fi

if test "x$class" = "x"

then

class=$(grep -o "

打印针数.*" $1 | cut -d > -f 4 | cut -d < -f 1 )

fi

if test "x$class" = "x"

then

class="条形码打印机"

fi

if $( echo $class | grep --quiet '票据' )

then

class="536187477"

elif $( echo $class | grep --quiet '发票' )

then

class="536187477"

elif $( echo $class | grep --quiet '票证' )

then

class="536187477"

elif $( echo $class | grep --quiet '存折' )

then

class="536187477"

##################################################################

elif $( echo $class | grep --quiet '针' )

then

class="536187477"

##################################################################

elif $( echo $class | grep --quiet '灯泡' )

then

class="536187479"

elif $( echo $class | grep --quiet 'UHE' )

then

class="536187479"

elif $( echo $class | grep --quiet 'UHP' )

then

class="536187479"

elif $( echo $class | grep --quiet 'HSCR' )

then

class="536187479"

###############################################################

elif $( echo $class | grep --quiet '条形码打印机' )

then

class="536187480"

##################################################################

elif $( echo $class | grep --quiet '证卡打印' )

then

class="536187483"

##################################################################

elif $( echo $class | grep --quiet '条码' )

then

class="536187481"

elif $( echo $class | grep --quiet '扫描' )

then

class="536187481"

elif $( echo $class | grep --quiet '阅读' )

then

class="536187481"

elif $( echo $class | grep --quiet '采集' )

then

class="536187481"

elif $( echo $class | grep --quiet '手持' )

then

class="536187481"

elif $( echo $class | grep --quiet '数据终端' )

then

class="536187481"

##################################################################

elif $( echo $class | grep --quiet '激光' )

then

class="536187484"

##################################################################

elif $( echo $class | grep --quiet '喷墨' )

then

class="536187486"

##################################################################

elif $( echo $class | grep --quiet '复印' )

then

class="536187615"

##################################################################

elif $( echo $class | grep --quiet '一体机' )

then

class="536187485"

##################################################################

elif $( echo $class | grep --quiet '硒鼓' )

then

class="536187616"

elif $( echo $class | grep --quiet '墨盒' )

then

class="536187616"

else

class="536187616"

fi

################################################################

imagepath=$(find $basedir -type f -iname "*.jpg")

if test "x$imagepath" = "x"; then

exit ;

fi

image=$(md5sum $imagepath | cut -d ' ' -f 1)

cp -rf $imagepath $basedir/../../template/$image.tbi

################################################################

desc=$(cat $1)

################################################################

echo -e "$name""t"110514"t"",$class,""t"1"t""上海""t""上海""t""b""t"$price"t"0.000000"t"1"t"7"t"2"t"0.000000"t"0.000000"t"0.000000"t""t""t"1"t"1"t"0"t"1"t"1"t"0"t""2012-10-16 13:09:48""t""t""$desc""t""t""20000:31140;20196:3228846;29969:107401;30681:32998;31468:102250;31479:92188;3415558:27513;3415563:21959;3415571:21959;3415581:10122;3415609:22041;7884463:75957615;14319244:80897641;14319250:123483713;14791484:10285019;""t""t""t"0"t"0"t""2012-10-16 13:37:51""t"100"t""t"0"t""$image:0:0:|;""t""""t""""t"",""t"",""t""""t""""t"0"t""15758222730""t"15758222730

classtable = {

17 "536187477" : "票据打印机" ,

18 "536187478" : "针式打印机" ,

19 "536187479" : "投影灯泡" ,

20 "536187480" : "条形码打印机" ,

21 "536187481" : "条码设备" ,

22 "536187483" : "证卡打印机" ,

23 "536187484" : "激光打印机" ,

24 "536187485" : "多功能一体机" ,

25 "536187486" : "喷墨打印机" ,

26 "536187615" : "复印复合机" ,

27 "536187616" : "硒鼓" ,

28 }

最后

以上就是动人飞鸟为你收集整理的pythontable处理_python处理html的table标签的全部内容,希望文章能够帮你解决pythontable处理_python处理html的table标签所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(34)

评论列表共有 0 条评论

立即
投稿
返回
顶部