python辅助修改试卷（简单操作docx与excel）解决方法

134 阅读 0 评论 89 点赞

我是靠谱客的博主彩色火车，这篇文章主要介绍python辅助修改试卷（简单操作docx与excel）解决方法，现在分享给大家，希望可以做个参考。

有五个章节的word作业需要修改，问题在于每个同学都有自己的风格，如下：

大多数正常的答案（不同的同学可能位置不在图中所示的位置）
不省心的同学1
不省心的同学2
有横线的选项（情况一）
有横线的选项（情况二）
有横线的选项（情况三）
有横线的选项（情况四）
有横线的选项（情况五）

在这里插入图片描述
当然还包括该同学这道题根本没有选答案，列举的情况并不包括所有的情况，故编码起来很繁琐，得同时考虑到很多种情况。

解决方法

一开始我只看到了有横线的情况，所以我的方法是：

方法一

用正则表达式去匹配ABCD答案，这样的方法在有横线的（一、三、四、五）情况中都不满足，只能说是天真了。

regu_lin_ans_lin = r"_[A-D]_"
pt_l_a_l = re.compile(regu_lin_ans_lin)

若继续加上匹配 [A-D] 和 [A-D] 的情况也只是能匹配多一点的情况了。

方法二

既然不能满足横线之外，那么现在我们只要检测到横线就能检测到答案ABCD了，所以分为两步：
1、先去检测横线；
2、然后检测当前行是否存在ABCD（可能会存在误差，因为题目中也可能涉及ABCD）


regu_ans = '[A-D]'
pt_a = re.compile(regu_ans)

text = para.text
if '_' in text:
    ans = pt_a.findall(text)
    if len(ans) == 0:
        answers.append(-1)                   # if not detected marked as -1
        is_mark = True
        continue
    ans = ans[0]
    tmp = ch2num(ans)
    answers.append(tmp)
    index += 1

方法三

先检测出：第X题
然后去寻找给出的选项答案ABCD，当然应该在题目所给的A.xxxxx选项之前的ABCD字母。

这里就涉及到若某一题目根本没有做的情况。

doc文档操作

text = "在Excel中，下面对于自定义自动筛选说法中不正确的是"     # ___C_____。
lin_ans_lin = r"_[A-D]_"
pt = re.compile(lin_ans_lin)
lin_ans_lin = pt.findall(text)
print(lin_ans_lin)

f = open('第五章数据管理与分析-171xxxx-xxx.docx', 'rb')
doc = Document(f)
print(doc)
index = 1
for para in doc.paragraphs:
    print(str(index) + '段t' + para.text)
    index += 1

excel文档操作

将成绩填入对应的位置

import xlrd
import os
import re
from xlutils.copy import copy

excel_file_path = 'excel/课堂测试成绩.xlsx'

# step 1： 复制并且获取第一个sheet
data = xlrd.open_workbook(excel_file_path)
# 拷贝一份原来的excel
book_new = copy(data)
sheet_new = book_new.get_sheet(0)

table = data.sheets()[0]
print(table.nrows)

# step 2： 建立学号与行号的对应，方便后续的操作
dic_stuid_row_num = {}

for i in range(table.nrows):
    if i == 0:
        continue
    text = table.row_values(i)
    dic_stuid_row_num[text[0]] = i

# print(dic_stuid_row_num)

# step 3： 遍历文件，将对应的成绩填入excel
re_num = r"d+"
pt_num = re.compile(re_num)

files = ['chapter2.txt', 'chapter3.txt', 'chapter4.txt', 'chapter5.txt']
sub_nums = [32, 45, 30, 30]
base_dir = 'files'

for index in range(len(files)):
    path = os.path.join(base_dir, files[index])
    with open(path, 'r', encoding='UTF-8') as f:
        lines = f.readlines()
        for line in lines:

            rs = pt_num.findall(line)

            print(rs)

            stu_id = rs[0]
            score = int(rs[1]) / sub_nums[index]

            if stu_id in dic_stuid_row_num.keys():
                row_num = dic_stuid_row_num[stu_id]
                sheet_new.write(row_num, index + 3, '%.2f' % (score * 100))


book_new.save('课堂测试成绩.xls')

批改试卷代码

main.py

# 计算正确的答案个数（需事先给出答案）
import os
import re
from python_docx_tutorial.ans_extractor import extract
from python_docx_tutorial.score_counter import sc_count


if __name__ == '__main__':
    result = {}
    marked_file = []

    # 章节
    # 第五章
    # true_answers = [1, 4, 4, 2, 3, 2, 4, 3, 4, 4,
    #                 2, 3, 3, 4, 3, 3, 1, 1, 3, 4,
    #                 3, 3, 3, 2, 2, 1, 4, 3, 4, 4]
    # base_dir = 'C:\Users\lenovo02\Documents\WeChat Files\Zipcoder\Files\第五章\学生提交'
    # sub_num = 30

    # 第四章
    # true_answers = [2, 1, 4, 2, 4, 1, 4, 2, 1, 4,
    #                 1, 2, 2, 1, 3, 1, 3, 4, 4, 4,
    #                 1, 3, 1, 2, 3, 4, 2, 3, 3, 2]
    # base_dir = 'G:\test\课堂测试\第四章\课堂测试-学生提交'
    # sub_num = 30

    # 第三章 （多选题和填空题直接给分，不然要加太多的事物逻辑）
    # true_answers = [-1, 2, -1, 2, -1, -1, 2, 1, 1, 1,
    #                 -1, 1, 2, 3, 1, 2, -1, 1, -1, -1,
    #                 1, 2, 2, -1, 1, 1, 1, 1, -1, 1,
    #                 2, 1, 1, 2, 2, 2, 1, 1, 1, 1,
    #                 2, 2, 3, -1, 4]
    # base_dir = 'G:\test\课堂测试\第三章\学生提交-课堂测试'
    # sub_num = 45

    # 第二章 （和第三章一样，多选填空直接给分,由于选项太多所以最后的几道题全不给分）
    true_answers = [1, 1, 1, 2, 2, 1, 1, 2, 1, 1,
                    1, 1, 2, 2, 2, 2, 3, 1, 4, 4,
                    -1, -1, -1, -1, -1, -2, -2, -2, -2, -2,
                    -2, -2]
    base_dir = 'G:\test\课堂测试\第二章\提交'
    sub_num = 32

    # 正则表达式初始化
    regu_stu_id = r"[0-9]"
    pt_stu_id = re.compile(regu_stu_id)

    sec_dirs_files = os.listdir(base_dir)
    for filename in sec_dirs_files:
        pathname = os.path.join(base_dir, filename)
        if os.path.isdir(pathname):
            # true file name
            file_name = os.listdir(pathname)[0]
            file_path = os.path.join(pathname, file_name)
            # print(file_path.title())

            stu_id = pt_stu_id.findall(file_name)
            stu_id = ''.join(stu_id)
            with open(file_path, 'rb') as f:
                try:

                    [answers, marked] = extract(f, sub_num)
                except Exception as e:
                    marked_file.append(file_path)
                    continue

                # assert for the num
                # if len(answers) != sub_num:
                #     print('not correctly detect the subject num! answers number {}'.format(len(answers)))

                if marked:
                    marked_file.append(file_path)
                    continue
                if len(answers) <= sub_num - 10:
                    print('有答案题目少于给定阈值，为{}!'.format(len(answers)))

                # count the score
                score = sc_count(true_answers, answers)
                print('学号：{}，成绩：{}'.format(stu_id, score))
                result[stu_id] = score                          # can be score / sub_num

        else:
            file_path = os.path.join(base_dir, filename)
            stu_id = pt_stu_id.findall(filename)
            stu_id = ''.join(stu_id)
            with open(file_path, 'rb') as f:
                [answers, marked] = extract(f, sub_num)
                # assert for the num
                # if len(answers) != sub_num:
                #     print('not correctly detect the subject num! answers number {}'.format(len(answers)))

                if marked:
                    marked_file.append(file_path)
                if len(answers) <= sub_num - 10:
                    print('有答案题目少于给定阈值，为{}!'.format(len(answers)))

                # count the score
                score = sc_count(true_answers, answers)
                print('学号：{}，成绩：{}'.format(stu_id, score))
                result[stu_id] = score  # can be score / sub_num

    rs_index = sorted(result.keys())
    print('有成绩的人数：{}'.format(len(result)))
    print(result)
    for r in rs_index:
        print('学号：{}， 成绩：{}'.format(r, result[r]))
    print('标注文档数目：{}'.format(len(marked_file)))
    for f in marked_file:
        print(f)

ans_extractor.py

from docx import Document
import re


def ch2num(ch):
    """
            转化为对应的数字，方便后续计算
    :param ch:
    :return:
    """
    dic = {'A': 1, 'B': 2,  'C': 3, 'D': 4}

    return dic[ch]


def extract(f, total_sub):
    """

    :param f:       文件（已打开文件）
    :param total_sub: 总共的题目数量
    :return:
    """
    # save ans for every one
    answers = []
    is_mark = False

    # step 1: init re lib
    regu_sub = r"第d+题"
    regu_num = r"d+"
    regu_lin_ans_lin = r"_[A-D]"
    regu_ans = '[A-D]'

    pt_l_a_l = re.compile(regu_lin_ans_lin)
    pt_a = re.compile(regu_ans)
    pt_sub = re.compile(regu_sub)
    pt_sub_num = re.compile(regu_num)

    # step 2: search the doc for answers like a b c d
    print('开始读取文档：{}'.format(f.name))
    doc = Document(f)
    index = 0
    is_find_sub = False
    is_find_ans = False
    sub_num = 1
    for para in doc.paragraphs:

        # 由于个个人的文档不规范，这里强制使用另外的蠢方法
        # lin_ans_lin = pt_l_a_l.findall(para.text)
        # print(para.text)
        # if len(lin_ans_lin) == 1:       # if one answers catch
        #     index += 1
        #     ans = pt_a.findall(lin_ans_lin[0])[0]
        #     tmp = ch2num(ans)
        #     answers.append(tmp)
        #     # print the index and corresponding answer
        #     # print('抽取到第{}答案：{}'.format(index, ans))

        # 蠢方法1：（判断有横线，然后提取ABCD）
        # text = para.text
        # if '_' in text:
        #     ans = pt_a.findall(text)
        #     if len(ans) == 0:
        #         answers.append(-1)                   # if not detected marked as -1
        #         is_mark = True
        #         continue
        #     ans = ans[0]
        #     tmp = ch2num(ans)
        #     answers.append(tmp)
        #     index += 1

        # 蠢方法2：找到模型： "第[0-9]题", 然后找到第一个出现的字母
        text = para.text
        # print(text)
        if text.strip().startswith('A'):                # if reach the A........ then stop to find the answer
            is_find_sub = False
        subject = pt_sub.findall(text)
        if len(subject) == 1:                           # find subject
            is_find_sub = True
            sub_num = pt_sub_num.findall(subject[0])
            if not is_find_ans and len(answers) != 0:   # if not find the corresponding answer set -1 instead
                answers.append(-1)

        if is_find_sub:
            ans = pt_a.findall(text)
            if len(ans) >= 1:
                # if index == sub_num:
                ans = ans[-1]
                # print('题目：{}，检测序号：{}， 答案：{}'.format(sub_num, index + 1, ans))
                tmp = ch2num(ans)
                answers.append(tmp)
                is_find_sub = False
                is_find_ans = True
                index += 1

    # step 3:
    if index >= total_sub - 10:                             # if the detected num is reach a level , not to review
        is_mark = False
    if is_mark:
        print('mark, 文件名：{}'.format(f.name))

    # step 4: judge the index num and given total subject num
    if index >= total_sub - 10:
        print("抽取完毕，个数为：{}".format(index))
        return [answers, False]
    else:
        print("题目数量差太多，检查文档！")
        print('检测到的题目个数为：{}'.format(len(answers)))
        return [answers, True]


if __name__ == '__main__':
    file = open('C:\Users\lenovo02\Documents\WeChat Files\Zipcoder\Files\第五章\学生提交\F110_192.168.117.110\18120318叶宜宁.docx', 'rb')
    extract(file, 30)

score_counter.py

def sc_count(a, b):
    """

    :param a:    基准
    :param b:    待测
    :return:
    """
    count = 0
    for i in range(len(b)):
        if a[i] == -1:          # 标注-1 直接给分
            count += 1
            continue
        if a[i] == -2:          # 标注-2 直接pass
            continue
        if b[i] == -1:
            continue
        else:
            # print(i)
            count += 1 if a[i] == b[i] else 0
    return count